1 # Copyright (C) 2006 by Aiwota Programmer
2 # aiwotaprog@tetteke.tk
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30 from http_sub import HTTPRedirectHandler302, HTTPDebugHandler
32 BOARD_DATA_INVALID_VALUE = 0
37 def __init__(self, bbs_type):
38 self.bbs_type = bbs_type
40 def set_status(self, text):
43 def _merge_new_thread(self, datalist, id, title, res, num, lastmod):
51 # avoid the Last-Modified time of subject.txt and
52 # the build time of thread is equal (zero division)
57 average = round(res * 60 * 60 * 24.0 / dur, 2)
62 # already exists in datalist and num is not 0, then this thread
63 # is duplicate in subject.txt.
70 item["average"] = average
72 datalist[id] = {"id": id, "num": num, "title": title,
73 "res": res, "lineCount": BOARD_DATA_INVALID_VALUE,
74 "lastModified": "", "average": average}
76 def merge_local_subjecttxt(self, datalist):
77 f = lambda id, title, res, num, lastmod: \
78 self._merge_new_thread(datalist, id, title, res, num, lastmod)
79 self._load_subjecttxt(f)
81 def merge_remote_subjecttxt(self, datalist):
82 f = lambda id, title, res, num, lastmod: \
83 self._merge_new_thread(datalist, id, title, res, num, lastmod)
84 self._get_subjecttxt(f)
86 def _init_extra_data(self, dic):
92 def load_idxfiles(self):
94 iterable = cachefile.load_cache(self.bbs_type)
95 iterable = itertools.imap(self._init_extra_data, iterable)
96 datalist = dict([(dic["id"], dic) for dic in iterable])
99 self._load_modified_idxfiles(datalist)
101 cachefile.save_cache(self.bbs_type, datalist)
105 def _load_modified_idxfiles(self, datalist):
106 basedir = misc.get_thread_idx_dir_path(self.bbs_type)
108 exist_key_set = set()
109 if os.path.isdir(basedir):
110 for idxfile_path in glob.glob(os.path.join(basedir, "*"+ext)):
111 basename = os.path.basename(idxfile_path)
112 thread_id = basename[:len(ext)*-1]
114 idxlastModified = os.path.getmtime(idxfile_path)
117 exist_key_set.add(thread_id)
118 if thread_id not in datalist:
119 print "new", thread_id
120 bbs_type_for_thread = self.bbs_type.clone_with_thread(
122 dic = idxfile.load_idx(bbs_type_for_thread)
123 dic["id"] = thread_id
124 dic["idxlastModified"] = idxlastModified
125 dic = self._init_extra_data(dic)
126 datalist[thread_id] = dic
127 elif idxlastModified > datalist[thread_id]["idxlastModified"]:
128 print "modified", thread_id
129 bbs_type_for_thread = self.bbs_type.clone_with_thread(
131 datalist[thread_id]["idxlastModified"] = idxlastModified
132 dic = idxfile.load_idx(bbs_type_for_thread)
133 for key, value in dic.iteritems():
134 datalist[thread_id][key] = value
136 # delete from datalist if idx file does not exist.
137 for key in datalist.keys():
138 if key not in exist_key_set:
142 def _split_record(self, line_encoded):
143 line = line_encoded.decode(self.bbs_type.encoding, "replace")
144 m = self.bbs_type.subject_reg.match(line)
147 title = m.group("title")
149 res = int(m.group("res"))
152 return id, title, res
155 def _load_subjecttxt(self, func):
156 lastmod = self.load_board_idx()
158 lastmod = misc.httpdate_to_secs(lastmod)
162 subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
164 for num, line_encoded \
165 in itertools.izip(itertools.count(1),
166 file(subjecttxt_path)):
167 result = self._split_record(line_encoded)
169 id, title, res = result
171 func(id, title, res, num, lastmod)
173 traceback.print_exc()
175 traceback.print_exc()
177 def _get_subjecttxt(self, func):
181 opener = urllib2.build_opener(HTTPRedirectHandler302, HTTPDebugHandler)
182 request = urllib2.Request(self.bbs_type.get_subject_txt_uri())
183 request.add_header("User-agent", config.User_Agent)
185 response = opener.open(request)
186 except urllib2.HTTPError, e:
187 gobject.idle_add(self.set_status, "%d %s" % (e.code, e.msg))
188 print "switch to local"
189 self._load_subjecttxt(func)
190 except urllib2.URLError, e:
192 gobject.idle_add(self.set_status, str(e))
193 print "switch to local"
194 self._load_subjecttxt(func)
196 status = "%d %s" % (response.code, response.msg)
197 gobject.idle_add(self.set_status, status)
198 info = response.info()
201 if "Last-Modified" in info:
202 _lastmod = info["Last-Modified"]
203 self.save_board_idx(_lastmod)
205 lastmod = misc.httpdate_to_secs(_lastmod)
209 subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
210 basedir = os.path.dirname(subjecttxt_path)
211 if not os.path.isdir(basedir):
215 f = file(subjecttxt_path, "w")
217 traceback.print_exc()
220 for num, line_encoded in itertools.izip(itertools.count(1),
224 f.write(line_encoded)
226 traceback.print_exc()
227 result = self._split_record(line_encoded)
229 id, title, res = result
231 func(id, title, res, num, lastmod)
233 traceback.print_exc()
235 traceback.print_exc()
241 def load_board_idx(self):
243 boardidxfile = misc.get_board_idx_path(self.bbs_type)
245 for line in file(boardidxfile):
246 if line.startswith("lastModified="):
247 lastmod = line[len("lastModified="):].rstrip("\n")
250 traceback.print_exc()
253 def save_board_idx(self, lastmod):
257 boardidx_path = misc.get_board_idx_path(self.bbs_type)
258 basedir = os.path.dirname(boardidx_path)
259 if not os.path.isdir(basedir):
262 f = file(boardidx_path, "w")
263 f.write("lastModified=" + lastmod + "\n")