OSDN Git Service

Multithreads are abandoned. Alternatly, The asyncore substitutes.(#16776)
[fukui-no-namari/fukui-no-namari.git] / src / FukuiNoNamari / board_data.py
index b38b5c8..48cc21c 100644 (file)
 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 import gobject
+import gtk
 import os.path
 import glob
-import re
 import codecs
 import urllib2
 import traceback
 import itertools
+from StringIO import StringIO
 
 import cachefile
 import idxfile
 import misc
-from http_sub import HTTPRedirectHandler302
+import config
+from http_sub import HTTPRedirectHandler302, HTTPDebugHandler
+import network_manager
 
 BOARD_DATA_INVALID_VALUE = 0
-REG_EXPR = re.compile("(?P<id>.*).dat<>(?P<title>.*)\((?P<res>\d*)\)")
 
 
+class NothingToDoException: pass
+
+
+def accumulate(iterable, initial_value=0):
+    sum_value = initial_value
+    for value in iterable:
+        sum_value += value
+        yield sum_value
+
+def follow(iterable, under_value=0):
+    before = under_value
+    for item in iterable:
+        yield before, item
+        before = item
+
 class BoardData:
 
     def __init__(self, bbs_type):
         self.bbs_type = bbs_type
+        self.lastmod = ""
 
     def set_status(self, text):
         pass
 
+    def set_fraction(self, fraction):
+        pass
+
     def _merge_new_thread(self, datalist, id, title, res, num, lastmod):
         average = 0
         if lastmod != 0:
@@ -55,80 +76,201 @@ class BoardData:
                 if dur == 0:
                     average = 999999
                 else:
-                    average = (res * 60 * 60 * 24 / dur)
+                    average = round(res * 60 * 60 * 24.0 / dur, 2)
 
         if id in datalist:
             item = datalist[id]
-            item["num"] = num
-            item["title"] = title
-            item["res"] = res
-            item["average"] = average
+            if item["num"]:
+                # already exists in datalist and num is not 0, then this thread
+                # is duplicate in subject.txt.
+                # ignore second.
+                pass
+            else:
+                item["num"] = num
+                item["title"] = title
+                item["res"] = res
+                item["average"] = average
         else:
-            datalist[id] = {"num": num, "title": title,
+            datalist[id] = {"id": id, "num": num, "title": title,
                             "res": res, "lineCount": BOARD_DATA_INVALID_VALUE,
-                            "lastModified": "", "average": average}
+                            "lastModified": 0, "average": average, "oldRes": 0}
 
     def merge_local_subjecttxt(self, datalist):
-        f = lambda id, title, res, num, lastmod: \
+        for id, title, res, num, lastmod in self._load_subjecttxt():
             self._merge_new_thread(datalist, id, title, res, num, lastmod)
-        self._load_subjecttxt(f)
+            yield
+        status = "Complete subject file."
+        lastmod = self.load_board_idx()
+        if lastmod:
+            self.lastmod = lastmod
+            status = "%s [%s]" % (status, lastmod)
+        self.set_status(status)
 
-    def merge_remote_subjecttxt(self, datalist):
-        f = lambda id, title, res, num, lastmod: \
-            self._merge_new_thread(datalist, id, title, res, num, lastmod)
-        self._get_subjecttxt(f)
+    def merge_remote_subjecttxt(self, datalist, iterable):
+        for id, title, res, num, lastmod in iterable:
+            yield self._merge_new_thread(datalist, id, title, res, num, lastmod)
 
-    def _add_idx(self, datalist, id, dic):
-        datalist[id] = dic
+    def _init_extra_data(self, dic):
         dic["num"] = 0
         dic["res"] = 0
         dic["average"] = 0
-        
-    def load_idxfiles(self):
-        datalist = {}
-
-        def on_load_record(id, metadata_dic):
-            idxfile_path = misc.get_thread_idx_path(
-                self.bbs_type.bbs_type, self.bbs_type.board, id)
-            if os.path.exists(idxfile_path):
-                self._add_idx(datalist, id, metadata_dic)
-
-        print "load_cache"
-        cachefile.load_cache(
-            self.bbs_type.bbs_type, self.bbs_type.board, on_load_record)
-        print "load_idx"
-        self._load_modified_idxfiles(datalist)
-        print "save_cache"
-        cachefile.save_cache(
-            self.bbs_type.bbs_type, self.bbs_type.board, datalist)
-
-        return datalist
+        dic["oldRes"] = 0
+        return dic
+
+    def _progressing(self, iterable):
+        for before, fraction in follow(iterable):
+            if int(before*10) != int(fraction*10):
+                self.set_fraction(fraction)
+            yield fraction
+
+    def _modify_dict(self, item_dict):
+        # lastModified, httpdate to second
+        httpdate = item_dict["lastModified"]
+        try:
+            secs = misc.httpdate_to_secs(httpdate)
+        except ValueError:
+            item_dict["lastModified"] = 0
+        else:
+            item_dict["lastModified"] = secs
+        return item_dict
+
+    def load_idxfiles(self, datalist):
+        try:
+            for i in self._load_cache(datalist):
+                yield
+        except IOError:
+            # the ".cache" file does not exist.
+            pass
+        else:
+            self.set_status("Complete load cache.")
+
+        for i in self._load_modified_idxfiles(datalist):
+            yield
+
+        self.set_status("Complete load idx files.")
+
+        self._save_cache(datalist)
+        # do not wait to save
+
+        # adjustment after cache save, before load subject.txt
+        iterable = datalist.itervalues()
+        iterable = itertools.imap(self._modify_dict, iterable)
+        for i in iterable:
+            yield
+
+    def _load_cache(self, datalist):
+        try:
+            total = os.path.getsize(misc.get_board_cache_path(self.bbs_type))
+        except OSError:
+            total = -1
+
+        iterable = file(misc.get_board_cache_path(self.bbs_type))
+
+        # split
+        iterable_dic, iterable_line = itertools.tee(iterable)
+
+        iterable_dic = itertools.imap(lambda l: l.rstrip(), iterable_dic)
+        iterable_dic = cachefile.formatted_to_dict(iterable_dic)
+
+        iterable_line = itertools.imap(lambda x :len(x), iterable_line)
+        iterable_line = accumulate(iterable_line)
+        iterable_line = itertools.imap(
+            lambda value: float(value) / total / 5 * 2, iterable_line)
+        iterable_line = self._progressing(iterable_line)
+
+        # union
+        iterable = itertools.imap(lambda x, y: x, iterable_dic, iterable_line)
+
+        iterable = itertools.imap(self._init_extra_data, iterable)
+
+        for dic in iterable:
+            datalist[dic["id"]] = dic
+            yield
 
     def _load_modified_idxfiles(self, datalist):
-        basedir = misc.get_thread_idx_dir_path(
-            self.bbs_type.bbs_type, self.bbs_type.board)
-        if os.path.isdir(basedir):
-            for idxfile_path in glob.glob(os.path.join(basedir, "*.idx")):
-                thread_id, ext = os.path.splitext(
-                    os.path.basename(idxfile_path))
-                idxlastModified = os.path.getmtime(idxfile_path)
-                if thread_id not in datalist:
-                    print "new"
-                    dic = idxfile.load_idx(
-                        self.bbs_type.bbs_type, self.bbs_type.board, thread_id)
-                    #dic.pop("etag")
-                    dic["idxlastModified"] = idxlastModified
-                    self._add_idx(datalist, thread_id, dic)
-                elif idxlastModified > datalist[thread_id]["idxlastModified"]:
-                    print "modified"
-                    datalist[thread_id]["idxlastModified"] = idxlastModified
-                    dic = idxfile.load_idx(
-                        self.bbs_type.bbs_type, self.bbs_type.board, thread_id)
-                    for name in idxfile.metadata_namelist:
-                        datalist[thread_id][name] = dic[name]
-
-    def _split_record(self, line):
-        m = REG_EXPR.match(line)
+        ext = ".idx"
+
+        def id_and_lastmod(file_path):
+            thread_id = os.path.basename(file_path)[:len(ext)*-1]
+            try:
+                idxlastModified = int(os.path.getmtime(file_path))
+                return thread_id, idxlastModified
+            except OSError:
+                pass
+
+        def _do_new_thread(thread_id, idxlastModified):
+            print "new", thread_id
+
+            dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
+            dic["id"] = thread_id
+            dic["idxlastModified"] = idxlastModified
+            dic = self._init_extra_data(dic)
+            datalist[thread_id] = dic
+            return thread_id, idxlastModified
+
+        def _do_modified_thread(thread_id, idxlastModified):
+            print "modified", thread_id
+
+            datalist[thread_id]["idxlastModified"] = idxlastModified
+            dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
+            for key, value in dic.iteritems():
+                datalist[thread_id][key] = value
+            return thread_id, idxlastModified
+
+        def new_or_modified_thread(thread_id, idxlastModified):
+            if thread_id not in datalist:
+                return _do_new_thread(thread_id, idxlastModified)
+            elif idxlastModified > datalist[thread_id]["idxlastModified"]:
+                return _do_modified_thread(thread_id, idxlastModified)
+            return thread_id, idxlastModified
+
+        basedir = misc.get_thread_idx_dir_path(self.bbs_type)
+
+        filelist = glob.glob(os.path.join(basedir, "*"+ext))
+        total = len(filelist)
+
+        iterable = filelist
+
+        # split
+        iterable, iterable_count = itertools.tee(iterable)
+
+        iterable_count = itertools.izip(itertools.count(1), iterable_count)
+        iterable_count = itertools.starmap(lambda x, y: x, iterable_count)
+        iterable_count = itertools.imap(
+            lambda x: float(x)/total/10 + 0.4, iterable_count)
+        iterable_count = self._progressing(iterable_count)
+
+        # union
+        iterable = itertools.imap(lambda x, y: x, iterable, iterable_count)
+
+        iterable = itertools.imap(id_and_lastmod, iterable)
+        iterable = itertools.ifilter(None, iterable)
+        iterable = itertools.starmap(new_or_modified_thread, iterable)
+
+        exist_key_set = set()
+        iterable = itertools.starmap(lambda x, y: exist_key_set.add(x),
+            iterable)
+
+        for i in iterable:
+            yield
+
+        # delete from datalist if idx file does not exist.
+        datalist_key_set = frozenset(datalist.iterkeys())
+        delete_key_set = datalist_key_set - exist_key_set
+        for key in delete_key_set:
+            del datalist[key]
+            print "del", key
+            yield
+
+    def _save_cache(self, datalist):
+        iterable = datalist.items()
+        iterable = cachefile.dict_to_formatted(iterable)
+        c_file = misc.FileWrap(misc.get_board_cache_path(self.bbs_type), "w")
+        misc.chain(c_file.write, c_file.close, iterable)
+
+    def _split_record(self, line_encoded):
+        line = line_encoded.decode(self.bbs_type.encoding, "replace")
+        m = self.bbs_type.subject_reg.match(line)
         if m:
             id = m.group("id")
             title = m.group("title")
@@ -139,102 +281,127 @@ class BoardData:
             return id, title, res
         return None
 
-    def _load_subjecttxt(self, func):
+    def _load_subjecttxt(self):
         lastmod = self.load_board_idx()
         try:
             lastmod = misc.httpdate_to_secs(lastmod)
         except ValueError:
             lastmod = 0
 
-        subjecttxt_path = misc.get_board_subjecttxt_path(
-            self.bbs_type.bbs_type, self.bbs_type.board)
+        subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
         try:
-            for num, line_encoded \
-                    in itertools.izip(itertools.count(1),
-                                      file(subjecttxt_path)):
-                result = self._split_record(
-                    line_encoded.decode("cp932", "replace"))
+            total = os.path.getsize(subjecttxt_path)
+        except OSError:
+            total = -1
+
+        iterable = file(subjecttxt_path)
+
+        # split
+        iterable, iterable_len = itertools.tee(iterable)
+
+        iterable_len = itertools.imap(lambda l: len(l), iterable_len)
+        iterable_len = accumulate(iterable_len)
+        iterable_len = itertools.imap(
+            lambda value: float(value) / total / 2 + 0.5, iterable_len)
+        iterable_len = self._progressing(iterable_len)
+
+        # union
+        iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
+
+        iterable = itertools.izip(itertools.count(1), iterable)
+
+        def main_process():
+            for num, line_encoded in iterable:
+                result = self._split_record(line_encoded)
                 if result:
                     id, title, res = result
-                    try:
-                        func(id, title, res, num, lastmod)
-                    except:
-                        traceback.print_exc()
-        except IOError:
-            traceback.print_exc()
+                    yield id, title, res, num, lastmod
 
-    def _get_subjecttxt(self, func):
+        return main_process()
 
-        # get subject.txt
+    def get_subjecttxt(self, on_received):
+        uri = self.bbs_type.get_subject_txt_uri()
+        request = urllib2.Request(self.bbs_type.get_subject_txt_uri())
+        request.add_header("User-agent", config.User_Agent)
+        if self.lastmod:
+            request.add_header("If-modified-since", self.lastmod)
 
-        opener = urllib2.build_opener(HTTPRedirectHandler302)
         try:
-            response = opener.open(self.bbs_type.get_subject_txt_uri())
-        except urllib2.HTTPError, e:
-            print "%d %s" % (e.code, e.msg)
-            gobject.idle_add(self.set_status, "%d %s" % (e.code, e.msg))
-            print e.info()
-            print "switch to local"
-            self._load_subjecttxt(func)
-        except urllib2.URLError, e:
-            print e
-            gobject.idle_add(self.set_status, str(e))
-            print "switch to local"
-            self._load_subjecttxt(func)
+            network_manager.request_get(uri, request.headers, on_received)
+        except network_manager.BusyException:
+            self.set_status("The network is busy. Try later.")
+            raise NothingToDoException()
         else:
-            status = "%d %s" % (response.code, response.msg)
-            print status
-            gobject.idle_add(self.set_status, status)
-            info = response.info()
-            print info
+            self.set_status("GET...")
 
-            lastmod = 0
-            if "Last-Modified" in info:
-                _lastmod = info["Last-Modified"]
-                self.save_board_idx(_lastmod)
-                try:
-                    lastmod = misc.httpdate_to_secs(_lastmod)
-                except ValueError:
-                    lastmod = 0
-
-            subjecttxt_path = misc.get_board_subjecttxt_path(
-                self.bbs_type.bbs_type, self.bbs_type.board)
-            basedir = os.path.dirname(subjecttxt_path)
-            if not os.path.isdir(basedir):
-                os.makedirs(basedir)
-            f = None
+    def progress_response(self, response):
+        status = response.status
+        headers = response.headers
+        message = StringIO(response.message)
+
+        if "last-modified".capitalize() in headers:
+            self.set_status("%s [%s]" % (status,
+                headers["last-modified".capitalize()]))
+        else:
+            self.set_status("%s" % status)
+
+        version, code, msg = status.split(None, 2)
+        code = int(code)
+        if code != 200:
+            raise misc.StopChainException()
+
+        lastmod = 0
+        if "last-modified".capitalize() in headers:
+            _lastmod = headers["last-modified".capitalize()]
+            self.lastmod = _lastmod
+            self.save_board_idx(_lastmod)
             try:
-                f = file(subjecttxt_path, "w")
-            except IOError:
-                traceback.print_exc()
+                lastmod = misc.httpdate_to_secs(_lastmod)
+            except ValueError:
+                lastmod = 0
+
+        subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
+        f = misc.FileWrap(subjecttxt_path, "w")
 
+        try:
+            total = int(headers["content-length".capitalize()])
+        except:
+            total = -1
+
+        def saving(line_encoded):
             try:
-                for num, line_encoded in itertools.izip(itertools.count(1),
-                                                        response):
-                    if f:
-                        try:
-                            f.write(line_encoded)
-                        except IOError:
-                            traceback.print_exc()
-                    result = self._split_record(
-                        line_encoded.decode("cp932", "replace"))
-                    if result:
-                        id, title, res = result
-                        try:
-                            func(id, title, res, num, lastmod)
-                        except:
-                            traceback.print_exc()
-            except:
+                f.write(line_encoded)
+            except IOError:
                 traceback.print_exc()
+            return line_encoded
+
+        iterable = message
+
+        # split
+        iterable, iterable_len = itertools.tee(iterable)
 
-            if f:
-                f.close()
-                f = None
+        iterable_len = itertools.imap(lambda l: len(l), iterable_len)
+        iterable_len = accumulate(iterable_len)
+        iterable_len = itertools.imap(
+            lambda value: float(value) / total, iterable_len)
+        iterable_len = self._progressing(iterable_len)
+
+        # union
+        iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
+
+        iterable = itertools.imap(saving, iterable)
+        iterable = itertools.izip(itertools.count(1), iterable)
+
+        for num, line_encoded in iterable:
+            result = self._split_record(line_encoded)
+            if result:
+                id, title, res = result
+                yield id, title, res, num, lastmod
+        f.close()
 
     def load_board_idx(self):
         lastmod = ""
-        boardidxfile = misc.get_board_idx_path(
-            self.bbs_type.bbs_type, self.bbs_type.board)
+        boardidxfile = misc.get_board_idx_path(self.bbs_type)
         try:
             for line in file(boardidxfile):
                 if line.startswith("lastModified="):
@@ -248,8 +415,7 @@ class BoardData:
         if not lastmod:
             return
 
-        boardidx_path = misc.get_board_idx_path(
-            self.bbs_type.bbs_type, self.bbs_type.board)
+        boardidx_path = misc.get_board_idx_path(self.bbs_type)
         basedir = os.path.dirname(boardidx_path)
         if not os.path.isdir(basedir):
             os.makedirs(basedir)