1 # Copyright (C) 2006 by Aiwota Programmer
2 # aiwotaprog@tetteke.tk
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 from http_sub import HTTPRedirectHandler302, HTTPDebugHandler
33 BOARD_DATA_INVALID_VALUE = 0
35 def accumulate(iterable, initial_value=0):
36 sum_value = initial_value
37 for value in iterable:
41 def follow(iterable, under_value=0):
49 def __init__(self, bbs_type):
50 self.bbs_type = bbs_type
52 def set_status(self, text):
55 def set_fraction(self, fraction):
58 def _merge_new_thread(self, datalist, id, title, res, num, lastmod):
66 # avoid the Last-Modified time of subject.txt and
67 # the build time of thread is equal (zero division)
72 average = round(res * 60 * 60 * 24.0 / dur, 2)
77 # already exists in datalist and num is not 0, then this thread
78 # is duplicate in subject.txt.
85 item["average"] = average
87 datalist[id] = {"id": id, "num": num, "title": title,
88 "res": res, "lineCount": BOARD_DATA_INVALID_VALUE,
89 "lastModified": "", "average": average}
91 def merge_local_subjecttxt(self, datalist):
92 iterable = self._load_subjecttxt()
94 for id, title, res, num, lastmod in iterable:
95 self._merge_new_thread(datalist, id, title, res, num, lastmod)
97 status = "Complete subject file."
98 gobject.idle_add(self.set_status, status)
100 def merge_remote_subjecttxt(self, datalist):
101 iterable = self._get_subjecttxt()
103 for id, title, res, num, lastmod in iterable:
104 self._merge_new_thread(datalist, id, title, res, num, lastmod)
106 def _init_extra_data(self, dic):
112 def _progressing(self, iterable):
113 for before, fraction in follow(iterable):
114 if int(before*10) != int(fraction*10):
117 self.set_fraction(fraction)
122 def load_idxfiles(self):
124 datalist = self._load_cache()
126 self._load_modified_idxfiles(datalist)
128 cachefile.save_cache(self.bbs_type, datalist)
130 status = "Complete index files."
131 gobject.idle_add(self.set_status, status)
134 def _load_cache(self):
136 total = os.path.getsize(misc.get_board_cache_path(self.bbs_type))
140 iterable = cachefile.load_cache(self.bbs_type)
143 iterable_dic, iterable_line = itertools.tee(iterable)
144 iterable_dic = itertools.starmap(lambda x, y: x, iterable_dic)
145 iterable_line = itertools.starmap(lambda x, y: y, iterable_line)
147 iterable_line = itertools.imap(lambda x :len(x), iterable_line)
148 iterable_line = accumulate(iterable_line)
149 iterable_line = itertools.imap(
150 lambda value: float(value) / total / 5 * 2, iterable_line)
151 iterable_line = self._progressing(iterable_line)
154 iterable = itertools.imap(lambda x, y: x, iterable_dic, iterable_line)
156 iterable = itertools.imap(self._init_extra_data, iterable)
158 return dict([(dic["id"], dic) for dic in iterable])
160 def _load_modified_idxfiles(self, datalist):
163 def id_and_lastmod(file_path):
164 thread_id = os.path.basename(file_path)[:len(ext)*-1]
166 idxlastModified = os.path.getmtime(file_path)
167 return thread_id, idxlastModified
171 def _do_new_thread(thread_id, idxlastModified):
172 print "new", thread_id
174 dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
175 dic["id"] = thread_id
176 dic["idxlastModified"] = idxlastModified
177 dic = self._init_extra_data(dic)
178 datalist[thread_id] = dic
179 return thread_id, idxlastModified
181 def _do_modified_thread(thread_id, idxlastModified):
182 print "modified", thread_id
184 datalist[thread_id]["idxlastModified"] = idxlastModified
185 dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
186 for key, value in dic.iteritems():
187 datalist[thread_id][key] = value
188 return thread_id, idxlastModified
190 def new_or_modified_thread(thread_id, idxlastModified):
191 if thread_id not in datalist:
192 return _do_new_thread(thread_id, idxlastModified)
193 elif idxlastModified > datalist[thread_id]["idxlastModified"]:
194 return _do_modified_thread(thread_id, idxlastModified)
195 return thread_id, idxlastModified
197 basedir = misc.get_thread_idx_dir_path(self.bbs_type)
199 filelist = glob.glob(os.path.join(basedir, "*"+ext))
200 total = len(filelist)
205 iterable, iterable_count = itertools.tee(iterable)
207 iterable_count = itertools.izip(itertools.count(1), iterable_count)
208 iterable_count = itertools.starmap(lambda x, y: x, iterable_count)
209 iterable_count = itertools.imap(
210 lambda x: float(x)/total/10 + 0.4, iterable_count)
211 iterable_count = self._progressing(iterable_count)
214 iterable = itertools.imap(lambda x, y: x, iterable, iterable_count)
216 iterable = itertools.imap(id_and_lastmod, iterable)
217 iterable = itertools.ifilter(None, iterable)
218 iterable = itertools.starmap(new_or_modified_thread, iterable)
219 exist_key_set = frozenset([x for x, y in iterable])
221 # delete from datalist if idx file does not exist.
222 datalist_key_set = frozenset(datalist.iterkeys())
223 delete_key_set = datalist_key_set - exist_key_set
224 for key in delete_key_set:
228 def _split_record(self, line_encoded):
229 line = line_encoded.decode(self.bbs_type.encoding, "replace")
230 m = self.bbs_type.subject_reg.match(line)
233 title = m.group("title")
235 res = int(m.group("res"))
238 return id, title, res
241 def _load_subjecttxt(self):
242 lastmod = self.load_board_idx()
244 lastmod = misc.httpdate_to_secs(lastmod)
248 subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
250 total = os.path.getsize(subjecttxt_path)
254 iterable = file(subjecttxt_path)
257 iterable, iterable_len = itertools.tee(iterable)
259 iterable_len = itertools.imap(lambda l: len(l), iterable_len)
260 iterable_len = accumulate(iterable_len)
261 iterable_len = itertools.imap(
262 lambda value: float(value) / total / 2 + 0.5, iterable_len)
263 iterable_len = self._progressing(iterable_len)
266 iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
268 for num, line_encoded in itertools.izip(itertools.count(1), iterable):
269 result = self._split_record(line_encoded)
271 id, title, res = result
272 yield id, title, res, num, lastmod
274 def _get_subjecttxt(self):
278 opener = urllib2.build_opener(HTTPRedirectHandler302, HTTPDebugHandler)
279 request = urllib2.Request(self.bbs_type.get_subject_txt_uri())
280 request.add_header("User-agent", config.User_Agent)
282 response = opener.open(request)
283 except urllib2.HTTPError, e:
284 gobject.idle_add(self.set_status, "%d %s" % (e.code, e.msg))
285 print "switch to local"
286 self._load_subjecttxt(func)
287 except urllib2.URLError, e:
289 gobject.idle_add(self.set_status, str(e))
290 print "switch to local"
291 self._load_subjecttxt(func)
293 status = "%d %s" % (response.code, response.msg)
294 gobject.idle_add(self.set_status, status)
295 info = response.info()
298 if "Last-Modified" in info:
299 _lastmod = info["Last-Modified"]
300 self.save_board_idx(_lastmod)
302 lastmod = misc.httpdate_to_secs(_lastmod)
306 subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
307 f = misc.FileWrap(subjecttxt_path, "w")
310 total = int(info["Content-Length"])
314 def saving(line_encoded):
316 f.write(line_encoded)
318 traceback.print_exc()
324 iterable, iterable_len = itertools.tee(iterable)
326 iterable_len = itertools.imap(lambda l: len(l), iterable_len)
327 iterable_len = accumulate(iterable_len)
328 iterable_len = itertools.imap(
329 lambda value: float(value) / total / 2 + 0.5, iterable_len)
330 iterable_len = self._progressing(iterable_len)
333 iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
335 iterable = itertools.imap(saving, iterable)
336 iterable = itertools.izip(itertools.count(1), iterable)
338 for num, line_encoded in iterable:
339 result = self._split_record(line_encoded)
341 id, title, res = result
342 yield id, title, res, num, lastmod
346 def load_board_idx(self):
348 boardidxfile = misc.get_board_idx_path(self.bbs_type)
350 for line in file(boardidxfile):
351 if line.startswith("lastModified="):
352 lastmod = line[len("lastModified="):].rstrip("\n")
355 traceback.print_exc()
358 def save_board_idx(self, lastmod):
362 boardidx_path = misc.get_board_idx_path(self.bbs_type)
363 basedir = os.path.dirname(boardidx_path)
364 if not os.path.isdir(basedir):
367 f = file(boardidx_path, "w")
368 f.write("lastModified=" + lastmod + "\n")