1 # Copyright (C) 2006 by Aiwota Programmer
2 # aiwotaprog@tetteke.tk
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
26 from StringIO import StringIO
32 from http_sub import HTTPRedirectHandler302, HTTPDebugHandler
33 import network_manager
35 BOARD_DATA_INVALID_VALUE = 0
38 class NothingToDoException: pass
41 def accumulate(iterable, initial_value=0):
42 sum_value = initial_value
43 for value in iterable:
47 def follow(iterable, under_value=0):
55 def __init__(self, bbs_type):
56 self.bbs_type = bbs_type
59 def set_status(self, text):
62 def set_fraction(self, fraction):
65 def _merge_new_thread(self, datalist, id, title, res, num, lastmod):
73 # avoid the Last-Modified time of subject.txt and
74 # the build time of thread is equal (zero division)
79 average = round(res * 60 * 60 * 24.0 / dur, 2)
84 # already exists in datalist and num is not 0, then this thread
85 # is duplicate in subject.txt.
92 item["average"] = average
94 datalist[id] = {"id": id, "num": num, "title": title,
95 "res": res, "lineCount": BOARD_DATA_INVALID_VALUE,
96 "lastModified": 0, "average": average, "oldRes": 0}
98 def merge_local_subjecttxt(self, datalist):
99 for id, title, res, num, lastmod in self._load_subjecttxt():
100 self._merge_new_thread(datalist, id, title, res, num, lastmod)
102 status = "Complete subject file."
103 lastmod = self.load_board_idx()
105 self.lastmod = lastmod
106 status = "%s [%s]" % (status, lastmod)
107 self.set_status(status)
109 def merge_remote_subjecttxt(self, datalist, iterable):
110 for id, title, res, num, lastmod in iterable:
111 yield self._merge_new_thread(datalist, id, title, res, num, lastmod)
113 def _init_extra_data(self, dic):
120 def _progressing(self, iterable):
121 for before, fraction in follow(iterable):
122 if int(before*10) != int(fraction*10):
123 self.set_fraction(fraction)
126 def _modify_dict(self, item_dict):
127 # lastModified, httpdate to second
128 httpdate = item_dict["lastModified"]
130 secs = misc.httpdate_to_secs(httpdate)
132 item_dict["lastModified"] = 0
134 item_dict["lastModified"] = secs
137 def load_idxfiles(self, datalist):
139 for i in self._load_cache(datalist):
142 # the ".cache" file does not exist.
145 self.set_status("Complete load cache.")
147 for i in self._load_modified_idxfiles(datalist):
150 self.set_status("Complete load idx files.")
152 self._save_cache(datalist)
153 # do not wait to save
155 # adjustment after cache save, before load subject.txt
156 iterable = datalist.itervalues()
157 iterable = itertools.imap(self._modify_dict, iterable)
161 def _load_cache(self, datalist):
163 total = os.path.getsize(misc.get_board_cache_path(self.bbs_type))
167 iterable = file(misc.get_board_cache_path(self.bbs_type))
170 iterable_dic, iterable_line = itertools.tee(iterable)
172 iterable_dic = itertools.imap(lambda l: l.rstrip(), iterable_dic)
173 iterable_dic = cachefile.formatted_to_dict(iterable_dic)
175 iterable_line = itertools.imap(lambda x :len(x), iterable_line)
176 iterable_line = accumulate(iterable_line)
177 iterable_line = itertools.imap(
178 lambda value: float(value) / total / 5 * 2, iterable_line)
179 iterable_line = self._progressing(iterable_line)
182 iterable = itertools.imap(lambda x, y: x, iterable_dic, iterable_line)
184 iterable = itertools.imap(self._init_extra_data, iterable)
187 datalist[dic["id"]] = dic
190 def _load_modified_idxfiles(self, datalist):
193 def id_and_lastmod(file_path):
194 thread_id = os.path.basename(file_path)[:len(ext)*-1]
196 idxlastModified = int(os.path.getmtime(file_path))
197 return thread_id, idxlastModified
201 def _do_new_thread(thread_id, idxlastModified):
202 print "new", thread_id
204 dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
205 dic["id"] = thread_id
206 dic["idxlastModified"] = idxlastModified
207 dic = self._init_extra_data(dic)
208 datalist[thread_id] = dic
209 return thread_id, idxlastModified
211 def _do_modified_thread(thread_id, idxlastModified):
212 print "modified", thread_id
214 datalist[thread_id]["idxlastModified"] = idxlastModified
215 dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
216 for key, value in dic.iteritems():
217 datalist[thread_id][key] = value
218 return thread_id, idxlastModified
220 def new_or_modified_thread(thread_id, idxlastModified):
221 if thread_id not in datalist:
222 return _do_new_thread(thread_id, idxlastModified)
223 elif idxlastModified > datalist[thread_id]["idxlastModified"]:
224 return _do_modified_thread(thread_id, idxlastModified)
225 return thread_id, idxlastModified
227 basedir = misc.get_thread_idx_dir_path(self.bbs_type)
229 filelist = glob.glob(os.path.join(basedir, "*"+ext))
230 total = len(filelist)
235 iterable, iterable_count = itertools.tee(iterable)
237 iterable_count = itertools.izip(itertools.count(1), iterable_count)
238 iterable_count = itertools.starmap(lambda x, y: x, iterable_count)
239 iterable_count = itertools.imap(
240 lambda x: float(x)/total/10 + 0.4, iterable_count)
241 iterable_count = self._progressing(iterable_count)
244 iterable = itertools.imap(lambda x, y: x, iterable, iterable_count)
246 iterable = itertools.imap(id_and_lastmod, iterable)
247 iterable = itertools.ifilter(None, iterable)
248 iterable = itertools.starmap(new_or_modified_thread, iterable)
250 exist_key_set = set()
251 iterable = itertools.starmap(lambda x, y: exist_key_set.add(x),
257 # delete from datalist if idx file does not exist.
258 datalist_key_set = frozenset(datalist.iterkeys())
259 delete_key_set = datalist_key_set - exist_key_set
260 for key in delete_key_set:
265 def _save_cache(self, datalist):
266 iterable = datalist.items()
267 iterable = cachefile.dict_to_formatted(iterable)
268 c_file = misc.FileWrap(misc.get_board_cache_path(self.bbs_type), "w")
269 misc.chain(c_file.write, c_file.close, iterable)
271 def _split_record(self, line_encoded):
272 line = line_encoded.decode(self.bbs_type.encoding, "replace")
273 m = self.bbs_type.subject_reg.match(line)
276 title = m.group("title")
278 res = int(m.group("res"))
281 return id, title, res
284 def _load_subjecttxt(self):
285 lastmod = self.load_board_idx()
287 lastmod = misc.httpdate_to_secs(lastmod)
291 subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
293 total = os.path.getsize(subjecttxt_path)
297 iterable = file(subjecttxt_path)
300 iterable, iterable_len = itertools.tee(iterable)
302 iterable_len = itertools.imap(lambda l: len(l), iterable_len)
303 iterable_len = accumulate(iterable_len)
304 iterable_len = itertools.imap(
305 lambda value: float(value) / total / 2 + 0.5, iterable_len)
306 iterable_len = self._progressing(iterable_len)
309 iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
311 iterable = itertools.izip(itertools.count(1), iterable)
314 for num, line_encoded in iterable:
315 result = self._split_record(line_encoded)
317 id, title, res = result
318 yield id, title, res, num, lastmod
320 return main_process()
322 def get_subjecttxt(self, on_received):
323 uri = self.bbs_type.get_subject_txt_uri()
324 request = urllib2.Request(self.bbs_type.get_subject_txt_uri())
325 request.add_header("User-agent", config.User_Agent)
327 request.add_header("If-modified-since", self.lastmod)
330 network_manager.request_get(uri, request.headers, on_received)
331 except network_manager.BusyException:
332 self.set_status("The network is busy. Try later.")
333 raise NothingToDoException()
335 self.set_status("GET...")
337 def progress_response(self, response):
338 status = response.status
339 headers = response.headers
340 message = StringIO(response.message)
342 if "last-modified".capitalize() in headers:
343 self.set_status("%s [%s]" % (status,
344 headers["last-modified".capitalize()]))
346 self.set_status("%s" % status)
348 version, code, msg = status.split(None, 2)
351 raise misc.StopChainException()
354 if "last-modified".capitalize() in headers:
355 _lastmod = headers["last-modified".capitalize()]
356 self.lastmod = _lastmod
357 self.save_board_idx(_lastmod)
359 lastmod = misc.httpdate_to_secs(_lastmod)
363 subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
364 f = misc.FileWrap(subjecttxt_path, "w")
367 total = int(headers["content-length".capitalize()])
371 def saving(line_encoded):
373 f.write(line_encoded)
375 traceback.print_exc()
381 iterable, iterable_len = itertools.tee(iterable)
383 iterable_len = itertools.imap(lambda l: len(l), iterable_len)
384 iterable_len = accumulate(iterable_len)
385 iterable_len = itertools.imap(
386 lambda value: float(value) / total, iterable_len)
387 iterable_len = self._progressing(iterable_len)
390 iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
392 iterable = itertools.imap(saving, iterable)
393 iterable = itertools.izip(itertools.count(1), iterable)
395 for num, line_encoded in iterable:
396 result = self._split_record(line_encoded)
398 id, title, res = result
399 yield id, title, res, num, lastmod
402 def load_board_idx(self):
404 boardidxfile = misc.get_board_idx_path(self.bbs_type)
406 for line in file(boardidxfile):
407 if line.startswith("lastModified="):
408 lastmod = line[len("lastModified="):].rstrip("\n")
411 traceback.print_exc()
414 def save_board_idx(self, lastmod):
418 boardidx_path = misc.get_board_idx_path(self.bbs_type)
419 basedir = os.path.dirname(boardidx_path)
420 if not os.path.isdir(basedir):
423 f = file(boardidx_path, "w")
424 f.write("lastModified=" + lastmod + "\n")