1 # Copyright (C) 2006 by Aiwota Programmer
2 # aiwotaprog@tetteke.tk
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 from http_sub import HTTPRedirectHandler302, HTTPDebugHandler
33 BOARD_DATA_INVALID_VALUE = 0
35 def accumulate(iterable, initial_value=0):
36 sum_value = initial_value
37 for value in iterable:
41 def follow(iterable, under_value=0):
49 def __init__(self, bbs_type):
50 self.bbs_type = bbs_type
52 def set_status(self, text):
55 def set_fraction(self, fraction):
58 def _merge_new_thread(self, datalist, id, title, res, num, lastmod):
66 # avoid the Last-Modified time of subject.txt and
67 # the build time of thread is equal (zero division)
72 average = round(res * 60 * 60 * 24.0 / dur, 2)
77 # already exists in datalist and num is not 0, then this thread
78 # is duplicate in subject.txt.
85 item["average"] = average
87 datalist[id] = {"id": id, "num": num, "title": title,
88 "res": res, "lineCount": BOARD_DATA_INVALID_VALUE,
89 "lastModified": 0, "average": average, "oldRes": 0}
91 def merge_local_subjecttxt(self, datalist):
93 for id, title, res, num, lastmod in self._load_subjecttxt():
94 self._merge_new_thread(datalist, id, title, res, num, lastmod)
100 status = "Complete subject file."
101 gobject.idle_add(self.set_status, status)
103 def merge_remote_subjecttxt(self, datalist):
105 for id, title, res, num, lastmod in self._get_subjecttxt():
106 self._merge_new_thread(datalist, id, title, res, num, lastmod)
110 traceback.print_exc()
112 def _init_extra_data(self, dic):
119 def _progressing(self, iterable):
120 for before, fraction in follow(iterable):
121 if int(before*10) != int(fraction*10):
122 gtk.gdk.threads_enter()
124 self.set_fraction(fraction)
126 gtk.gdk.threads_leave()
129 def _modify_dict(self, item_dict):
130 # lastModified, httpdate to second
131 httpdate = item_dict["lastModified"]
133 secs = misc.httpdate_to_secs(httpdate)
135 item_dict["lastModified"] = 0
137 item_dict["lastModified"] = secs
140 def load_idxfiles(self):
143 datalist = self._load_cache()
147 self._load_modified_idxfiles(datalist)
150 self._save_cache(datalist)
152 traceback.print_exc()
154 # adjustment after cache save, before load subject.txt
155 iterable = datalist.itervalues()
156 iterable = itertools.imap(self._modify_dict, iterable)
157 for i in iterable: -1
159 status = "Complete index files."
160 gobject.idle_add(self.set_status, status)
163 def _load_cache(self):
165 total = os.path.getsize(misc.get_board_cache_path(self.bbs_type))
169 iterable = file(misc.get_board_cache_path(self.bbs_type))
172 iterable_dic, iterable_line = itertools.tee(iterable)
174 iterable_dic = itertools.imap(lambda l: l.rstrip(), iterable_dic)
175 iterable_dic = cachefile.formatted_to_dict(iterable_dic)
177 iterable_line = itertools.imap(lambda x :len(x), iterable_line)
178 iterable_line = accumulate(iterable_line)
179 iterable_line = itertools.imap(
180 lambda value: float(value) / total / 5 * 2, iterable_line)
181 iterable_line = self._progressing(iterable_line)
184 iterable = itertools.imap(lambda x, y: x, iterable_dic, iterable_line)
186 iterable = itertools.imap(self._init_extra_data, iterable)
188 return dict([(dic["id"], dic) for dic in iterable])
190 def _load_modified_idxfiles(self, datalist):
193 def id_and_lastmod(file_path):
194 thread_id = os.path.basename(file_path)[:len(ext)*-1]
196 idxlastModified = int(os.path.getmtime(file_path))
197 return thread_id, idxlastModified
201 def _do_new_thread(thread_id, idxlastModified):
202 print "new", thread_id
204 dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
205 dic["id"] = thread_id
206 dic["idxlastModified"] = idxlastModified
207 dic = self._init_extra_data(dic)
208 datalist[thread_id] = dic
209 return thread_id, idxlastModified
211 def _do_modified_thread(thread_id, idxlastModified):
212 print "modified", thread_id
214 datalist[thread_id]["idxlastModified"] = idxlastModified
215 dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
216 for key, value in dic.iteritems():
217 datalist[thread_id][key] = value
218 return thread_id, idxlastModified
220 def new_or_modified_thread(thread_id, idxlastModified):
221 if thread_id not in datalist:
222 return _do_new_thread(thread_id, idxlastModified)
223 elif idxlastModified > datalist[thread_id]["idxlastModified"]:
224 return _do_modified_thread(thread_id, idxlastModified)
225 return thread_id, idxlastModified
227 basedir = misc.get_thread_idx_dir_path(self.bbs_type)
229 filelist = glob.glob(os.path.join(basedir, "*"+ext))
230 total = len(filelist)
235 iterable, iterable_count = itertools.tee(iterable)
237 iterable_count = itertools.izip(itertools.count(1), iterable_count)
238 iterable_count = itertools.starmap(lambda x, y: x, iterable_count)
239 iterable_count = itertools.imap(
240 lambda x: float(x)/total/10 + 0.4, iterable_count)
241 iterable_count = self._progressing(iterable_count)
244 iterable = itertools.imap(lambda x, y: x, iterable, iterable_count)
246 iterable = itertools.imap(id_and_lastmod, iterable)
247 iterable = itertools.ifilter(None, iterable)
248 iterable = itertools.starmap(new_or_modified_thread, iterable)
249 exist_key_set = frozenset([x for x, y in iterable])
251 # delete from datalist if idx file does not exist.
252 datalist_key_set = frozenset(datalist.iterkeys())
253 delete_key_set = datalist_key_set - exist_key_set
254 for key in delete_key_set:
258 def _save_cache(self, datalist):
259 iterable = datalist.iteritems()
260 iterable = cachefile.dict_to_formatted(iterable)
261 c_file = misc.FileWrap(misc.get_board_cache_path(self.bbs_type), "w")
262 c_file.writelines(iterable)
264 def _split_record(self, line_encoded):
265 line = line_encoded.decode(self.bbs_type.encoding, "replace")
266 m = self.bbs_type.subject_reg.match(line)
269 title = m.group("title")
271 res = int(m.group("res"))
274 return id, title, res
277 def _load_subjecttxt(self):
278 lastmod = self.load_board_idx()
280 lastmod = misc.httpdate_to_secs(lastmod)
284 subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
286 total = os.path.getsize(subjecttxt_path)
290 iterable = file(subjecttxt_path)
293 iterable, iterable_len = itertools.tee(iterable)
295 iterable_len = itertools.imap(lambda l: len(l), iterable_len)
296 iterable_len = accumulate(iterable_len)
297 iterable_len = itertools.imap(
298 lambda value: float(value) / total / 2 + 0.5, iterable_len)
299 iterable_len = self._progressing(iterable_len)
302 iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
304 iterable = itertools.izip(itertools.count(1), iterable)
307 for num, line_encoded in iterable:
308 result = self._split_record(line_encoded)
310 id, title, res = result
311 yield id, title, res, num, lastmod
313 return main_process()
315 def _get_subjecttxt(self):
319 opener = urllib2.build_opener(HTTPRedirectHandler302, HTTPDebugHandler)
320 request = urllib2.Request(self.bbs_type.get_subject_txt_uri())
321 request.add_header("User-agent", config.User_Agent)
323 response = opener.open(request)
324 except urllib2.HTTPError, e:
325 gobject.idle_add(self.set_status, "%d %s" % (e.code, e.msg))
326 print "switch to local"
327 return self._load_subjecttxt()
328 except urllib2.URLError, e:
330 gobject.idle_add(self.set_status, str(e))
331 print "switch to local"
332 return self._load_subjecttxt()
334 status = "%d %s" % (response.code, response.msg)
335 gobject.idle_add(self.set_status, status)
336 info = response.info()
339 if "Last-Modified" in info:
340 _lastmod = info["Last-Modified"]
341 self.save_board_idx(_lastmod)
343 lastmod = misc.httpdate_to_secs(_lastmod)
347 subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
348 f = misc.FileWrap(subjecttxt_path, "w")
351 total = int(info["Content-Length"])
355 def saving(line_encoded):
357 f.write(line_encoded)
359 traceback.print_exc()
365 iterable, iterable_len = itertools.tee(iterable)
367 iterable_len = itertools.imap(lambda l: len(l), iterable_len)
368 iterable_len = accumulate(iterable_len)
369 iterable_len = itertools.imap(
370 lambda value: float(value) / total, iterable_len)
371 iterable_len = self._progressing(iterable_len)
374 iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
376 iterable = itertools.imap(saving, iterable)
377 iterable = itertools.izip(itertools.count(1), iterable)
380 for num, line_encoded in iterable:
381 result = self._split_record(line_encoded)
383 id, title, res = result
384 yield id, title, res, num, lastmod
386 return main_process()
388 def load_board_idx(self):
390 boardidxfile = misc.get_board_idx_path(self.bbs_type)
392 for line in file(boardidxfile):
393 if line.startswith("lastModified="):
394 lastmod = line[len("lastModified="):].rstrip("\n")
397 traceback.print_exc()
400 def save_board_idx(self, lastmod):
404 boardidx_path = misc.get_board_idx_path(self.bbs_type)
405 basedir = os.path.dirname(boardidx_path)
406 if not os.path.isdir(basedir):
409 f = file(boardidx_path, "w")
410 f.write("lastModified=" + lastmod + "\n")