# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
import gobject
+import gtk
import os.path
import glob
-import re
import codecs
import urllib2
import traceback
import itertools
+from StringIO import StringIO
import cachefile
import idxfile
import misc
-from http_sub import HTTPRedirectHandler302
+import config
+from http_sub import HTTPRedirectHandler302, HTTPDebugHandler
+import network_manager
BOARD_DATA_INVALID_VALUE = 0
-REG_EXPR = re.compile("(?P<id>.*).dat<>(?P<title>.*)\((?P<res>\d*)\)")
+class NothingToDoException: pass
+
+
+def accumulate(iterable, initial_value=0):
+ sum_value = initial_value
+ for value in iterable:
+ sum_value += value
+ yield sum_value
+
+def follow(iterable, under_value=0):
+ before = under_value
+ for item in iterable:
+ yield before, item
+ before = item
+
class BoardData:
def __init__(self, bbs_type):
self.bbs_type = bbs_type
+ self.lastmod = ""
def set_status(self, text):
pass
+ def set_fraction(self, fraction):
+ pass
+
def _merge_new_thread(self, datalist, id, title, res, num, lastmod):
average = 0
if lastmod != 0:
if dur == 0:
average = 999999
else:
- average = (res * 60 * 60 * 24 / dur)
+ average = round(res * 60 * 60 * 24.0 / dur, 2)
if id in datalist:
item = datalist[id]
- item["num"] = num
- item["title"] = title
- item["res"] = res
- item["average"] = average
+ if item["num"]:
+ # already exists in datalist and num is not 0, then this thread
+ # is duplicate in subject.txt.
+ # ignore second.
+ pass
+ else:
+ item["num"] = num
+ item["title"] = title
+ item["res"] = res
+ item["average"] = average
else:
- datalist[id] = {"num": num, "title": title,
+ datalist[id] = {"id": id, "num": num, "title": title,
"res": res, "lineCount": BOARD_DATA_INVALID_VALUE,
- "lastModified": "", "average": average}
+ "lastModified": 0, "average": average, "oldRes": 0}
def merge_local_subjecttxt(self, datalist):
- f = lambda id, title, res, num, lastmod: \
+ for id, title, res, num, lastmod in self._load_subjecttxt():
self._merge_new_thread(datalist, id, title, res, num, lastmod)
- self._load_subjecttxt(f)
+ yield
+ status = "Complete subject file."
+ lastmod = self.load_board_idx()
+ if lastmod:
+ self.lastmod = lastmod
+ status = "%s [%s]" % (status, lastmod)
+ self.set_status(status)
- def merge_remote_subjecttxt(self, datalist):
- f = lambda id, title, res, num, lastmod: \
- self._merge_new_thread(datalist, id, title, res, num, lastmod)
- self._get_subjecttxt(f)
+ def merge_remote_subjecttxt(self, datalist, iterable):
+ for id, title, res, num, lastmod in iterable:
+ yield self._merge_new_thread(datalist, id, title, res, num, lastmod)
- def _add_idx(self, datalist, id, dic):
- datalist[id] = dic
+ def _init_extra_data(self, dic):
dic["num"] = 0
dic["res"] = 0
dic["average"] = 0
-
- def load_idxfiles(self):
- datalist = {}
-
- def on_load_record(id, metadata_dic):
- idxfile_path = misc.get_thread_idx_path(
- self.bbs_type.bbs_type, self.bbs_type.board, id)
- if os.path.exists(idxfile_path):
- self._add_idx(datalist, id, metadata_dic)
-
- print "load_cache"
- cachefile.load_cache(
- self.bbs_type.bbs_type, self.bbs_type.board, on_load_record)
- print "load_idx"
- self._load_modified_idxfiles(datalist)
- print "save_cache"
- cachefile.save_cache(
- self.bbs_type.bbs_type, self.bbs_type.board, datalist)
-
- return datalist
+ dic["oldRes"] = 0
+ return dic
+
+ def _progressing(self, iterable):
+ for before, fraction in follow(iterable):
+ if int(before*10) != int(fraction*10):
+ self.set_fraction(fraction)
+ yield fraction
+
+ def _modify_dict(self, item_dict):
+ # lastModified, httpdate to second
+ httpdate = item_dict["lastModified"]
+ try:
+ secs = misc.httpdate_to_secs(httpdate)
+ except ValueError:
+ item_dict["lastModified"] = 0
+ else:
+ item_dict["lastModified"] = secs
+ return item_dict
+
+ def load_idxfiles(self, datalist):
+ try:
+ for i in self._load_cache(datalist):
+ yield
+ except IOError:
+ # the ".cache" file does not exist.
+ pass
+ else:
+ self.set_status("Complete load cache.")
+
+ for i in self._load_modified_idxfiles(datalist):
+ yield
+
+ self.set_status("Complete load idx files.")
+
+ self._save_cache(datalist)
+ # do not wait to save
+
+ # adjustment after cache save, before load subject.txt
+ iterable = datalist.itervalues()
+ iterable = itertools.imap(self._modify_dict, iterable)
+ for i in iterable:
+ yield
+
+ def _load_cache(self, datalist):
+ try:
+ total = os.path.getsize(misc.get_board_cache_path(self.bbs_type))
+ except OSError:
+ total = -1
+
+ iterable = file(misc.get_board_cache_path(self.bbs_type))
+
+ # split
+ iterable_dic, iterable_line = itertools.tee(iterable)
+
+ iterable_dic = itertools.imap(lambda l: l.rstrip(), iterable_dic)
+ iterable_dic = cachefile.formatted_to_dict(iterable_dic)
+
+ iterable_line = itertools.imap(lambda x :len(x), iterable_line)
+ iterable_line = accumulate(iterable_line)
+ iterable_line = itertools.imap(
+ lambda value: float(value) / total / 5 * 2, iterable_line)
+ iterable_line = self._progressing(iterable_line)
+
+ # union
+ iterable = itertools.imap(lambda x, y: x, iterable_dic, iterable_line)
+
+ iterable = itertools.imap(self._init_extra_data, iterable)
+
+ for dic in iterable:
+ datalist[dic["id"]] = dic
+ yield
def _load_modified_idxfiles(self, datalist):
- basedir = misc.get_thread_idx_dir_path(
- self.bbs_type.bbs_type, self.bbs_type.board)
- if os.path.isdir(basedir):
- for idxfile_path in glob.glob(os.path.join(basedir, "*.idx")):
- thread_id, ext = os.path.splitext(
- os.path.basename(idxfile_path))
- idxlastModified = os.path.getmtime(idxfile_path)
- if thread_id not in datalist:
- print "new"
- dic = idxfile.load_idx(
- self.bbs_type.bbs_type, self.bbs_type.board, thread_id)
- #dic.pop("etag")
- dic["idxlastModified"] = idxlastModified
- self._add_idx(datalist, thread_id, dic)
- elif idxlastModified > datalist[thread_id]["idxlastModified"]:
- print "modified"
- datalist[thread_id]["idxlastModified"] = idxlastModified
- dic = idxfile.load_idx(
- self.bbs_type.bbs_type, self.bbs_type.board, thread_id)
- for name in idxfile.metadata_namelist:
- datalist[thread_id][name] = dic[name]
-
- def _split_record(self, line):
- m = REG_EXPR.match(line)
+ ext = ".idx"
+
+ def id_and_lastmod(file_path):
+ thread_id = os.path.basename(file_path)[:len(ext)*-1]
+ try:
+ idxlastModified = int(os.path.getmtime(file_path))
+ return thread_id, idxlastModified
+ except OSError:
+ pass
+
+ def _do_new_thread(thread_id, idxlastModified):
+ print "new", thread_id
+
+ dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
+ dic["id"] = thread_id
+ dic["idxlastModified"] = idxlastModified
+ dic = self._init_extra_data(dic)
+ datalist[thread_id] = dic
+ return thread_id, idxlastModified
+
+ def _do_modified_thread(thread_id, idxlastModified):
+ print "modified", thread_id
+
+ datalist[thread_id]["idxlastModified"] = idxlastModified
+ dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
+ for key, value in dic.iteritems():
+ datalist[thread_id][key] = value
+ return thread_id, idxlastModified
+
+ def new_or_modified_thread(thread_id, idxlastModified):
+ if thread_id not in datalist:
+ return _do_new_thread(thread_id, idxlastModified)
+ elif idxlastModified > datalist[thread_id]["idxlastModified"]:
+ return _do_modified_thread(thread_id, idxlastModified)
+ return thread_id, idxlastModified
+
+ basedir = misc.get_thread_idx_dir_path(self.bbs_type)
+
+ filelist = glob.glob(os.path.join(basedir, "*"+ext))
+ total = len(filelist)
+
+ iterable = filelist
+
+ # split
+ iterable, iterable_count = itertools.tee(iterable)
+
+ iterable_count = itertools.izip(itertools.count(1), iterable_count)
+ iterable_count = itertools.starmap(lambda x, y: x, iterable_count)
+ iterable_count = itertools.imap(
+ lambda x: float(x)/total/10 + 0.4, iterable_count)
+ iterable_count = self._progressing(iterable_count)
+
+ # union
+ iterable = itertools.imap(lambda x, y: x, iterable, iterable_count)
+
+ iterable = itertools.imap(id_and_lastmod, iterable)
+ iterable = itertools.ifilter(None, iterable)
+ iterable = itertools.starmap(new_or_modified_thread, iterable)
+
+ exist_key_set = set()
+ iterable = itertools.starmap(lambda x, y: exist_key_set.add(x),
+ iterable)
+
+ for i in iterable:
+ yield
+
+ # delete from datalist if idx file does not exist.
+ datalist_key_set = frozenset(datalist.iterkeys())
+ delete_key_set = datalist_key_set - exist_key_set
+ for key in delete_key_set:
+ del datalist[key]
+ print "del", key
+ yield
+
+ def _save_cache(self, datalist):
+ iterable = datalist.items()
+ iterable = cachefile.dict_to_formatted(iterable)
+ c_file = misc.FileWrap(misc.get_board_cache_path(self.bbs_type), "w")
+ misc.chain(c_file.write, c_file.close, iterable)
+
+ def _split_record(self, line_encoded):
+ line = line_encoded.decode(self.bbs_type.encoding, "replace")
+ m = self.bbs_type.subject_reg.match(line)
if m:
id = m.group("id")
title = m.group("title")
return id, title, res
return None
- def _load_subjecttxt(self, func):
+ def _load_subjecttxt(self):
lastmod = self.load_board_idx()
try:
lastmod = misc.httpdate_to_secs(lastmod)
except ValueError:
lastmod = 0
- subjecttxt_path = misc.get_board_subjecttxt_path(
- self.bbs_type.bbs_type, self.bbs_type.board)
+ subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
try:
- for num, line_encoded \
- in itertools.izip(itertools.count(1),
- file(subjecttxt_path)):
- result = self._split_record(
- line_encoded.decode("cp932", "replace"))
+ total = os.path.getsize(subjecttxt_path)
+ except OSError:
+ total = -1
+
+ iterable = file(subjecttxt_path)
+
+ # split
+ iterable, iterable_len = itertools.tee(iterable)
+
+ iterable_len = itertools.imap(lambda l: len(l), iterable_len)
+ iterable_len = accumulate(iterable_len)
+ iterable_len = itertools.imap(
+ lambda value: float(value) / total / 2 + 0.5, iterable_len)
+ iterable_len = self._progressing(iterable_len)
+
+ # union
+ iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
+
+ iterable = itertools.izip(itertools.count(1), iterable)
+
+ def main_process():
+ for num, line_encoded in iterable:
+ result = self._split_record(line_encoded)
if result:
id, title, res = result
- try:
- func(id, title, res, num, lastmod)
- except:
- traceback.print_exc()
- except IOError:
- traceback.print_exc()
+ yield id, title, res, num, lastmod
- def _get_subjecttxt(self, func):
+ return main_process()
- # get subject.txt
+ def get_subjecttxt(self, on_received):
+ uri = self.bbs_type.get_subject_txt_uri()
+ request = urllib2.Request(self.bbs_type.get_subject_txt_uri())
+ request.add_header("User-agent", config.User_Agent)
+ if self.lastmod:
+ request.add_header("If-modified-since", self.lastmod)
- opener = urllib2.build_opener(HTTPRedirectHandler302)
try:
- response = opener.open(self.bbs_type.get_subject_txt_uri())
- except urllib2.HTTPError, e:
- print "%d %s" % (e.code, e.msg)
- gobject.idle_add(self.set_status, "%d %s" % (e.code, e.msg))
- print e.info()
- print "switch to local"
- self._load_subjecttxt(func)
- except urllib2.URLError, e:
- print e
- gobject.idle_add(self.set_status, str(e))
- print "switch to local"
- self._load_subjecttxt(func)
+ network_manager.request_get(uri, request.headers, on_received)
+ except network_manager.BusyException:
+ self.set_status("The network is busy. Try later.")
+ raise NothingToDoException()
else:
- status = "%d %s" % (response.code, response.msg)
- print status
- gobject.idle_add(self.set_status, status)
- info = response.info()
- print info
+ self.set_status("GET...")
- lastmod = 0
- if "Last-Modified" in info:
- _lastmod = info["Last-Modified"]
- self.save_board_idx(_lastmod)
- try:
- lastmod = misc.httpdate_to_secs(_lastmod)
- except ValueError:
- lastmod = 0
-
- subjecttxt_path = misc.get_board_subjecttxt_path(
- self.bbs_type.bbs_type, self.bbs_type.board)
- basedir = os.path.dirname(subjecttxt_path)
- if not os.path.isdir(basedir):
- os.makedirs(basedir)
- f = None
+ def progress_response(self, response):
+ status = response.status
+ headers = response.headers
+ message = StringIO(response.message)
+
+ if "last-modified".capitalize() in headers:
+ self.set_status("%s [%s]" % (status,
+ headers["last-modified".capitalize()]))
+ else:
+ self.set_status("%s" % status)
+
+ version, code, msg = status.split(None, 2)
+ code = int(code)
+ if code != 200:
+ raise misc.StopChainException()
+
+ lastmod = 0
+ if "last-modified".capitalize() in headers:
+ _lastmod = headers["last-modified".capitalize()]
+ self.lastmod = _lastmod
+ self.save_board_idx(_lastmod)
try:
- f = file(subjecttxt_path, "w")
- except IOError:
- traceback.print_exc()
+ lastmod = misc.httpdate_to_secs(_lastmod)
+ except ValueError:
+ lastmod = 0
+
+ subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
+ f = misc.FileWrap(subjecttxt_path, "w")
+ try:
+ total = int(headers["content-length".capitalize()])
+ except:
+ total = -1
+
+ def saving(line_encoded):
try:
- for num, line_encoded in itertools.izip(itertools.count(1),
- response):
- if f:
- try:
- f.write(line_encoded)
- except IOError:
- traceback.print_exc()
- result = self._split_record(
- line_encoded.decode("cp932", "replace"))
- if result:
- id, title, res = result
- try:
- func(id, title, res, num, lastmod)
- except:
- traceback.print_exc()
- except:
+ f.write(line_encoded)
+ except IOError:
traceback.print_exc()
+ return line_encoded
+
+ iterable = message
+
+ # split
+ iterable, iterable_len = itertools.tee(iterable)
- if f:
- f.close()
- f = None
+ iterable_len = itertools.imap(lambda l: len(l), iterable_len)
+ iterable_len = accumulate(iterable_len)
+ iterable_len = itertools.imap(
+ lambda value: float(value) / total, iterable_len)
+ iterable_len = self._progressing(iterable_len)
+
+ # union
+ iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
+
+ iterable = itertools.imap(saving, iterable)
+ iterable = itertools.izip(itertools.count(1), iterable)
+
+ for num, line_encoded in iterable:
+ result = self._split_record(line_encoded)
+ if result:
+ id, title, res = result
+ yield id, title, res, num, lastmod
+ f.close()
def load_board_idx(self):
lastmod = ""
- boardidxfile = misc.get_board_idx_path(
- self.bbs_type.bbs_type, self.bbs_type.board)
+ boardidxfile = misc.get_board_idx_path(self.bbs_type)
try:
for line in file(boardidxfile):
if line.startswith("lastModified="):
if not lastmod:
return
- boardidx_path = misc.get_board_idx_path(
- self.bbs_type.bbs_type, self.bbs_type.board)
+ boardidx_path = misc.get_board_idx_path(self.bbs_type)
basedir = os.path.dirname(boardidx_path)
if not os.path.isdir(basedir):
os.makedirs(basedir)