OSDN Git Service

Selected text colors follow the focus in ThreadView.
[fukui-no-namari/fukui-no-namari.git] / src / FukuiNoNamari / board_data.py
1 # Copyright (C) 2006 by Aiwota Programmer
2 # aiwotaprog@tetteke.tk
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 import gobject
19 import gtk
20 import os.path
21 import glob
22 import codecs
23 import urllib2
24 import traceback
25 import itertools
26
27 import cachefile
28 import idxfile
29 import misc
30 import config
31 from http_sub import HTTPRedirectHandler302, HTTPDebugHandler
32
33 BOARD_DATA_INVALID_VALUE = 0
34
35 def accumulate(iterable, initial_value=0):
36     sum_value = initial_value
37     for value in iterable:
38         sum_value += value
39         yield sum_value
40
41 def follow(iterable, under_value=0):
42     before = under_value
43     for item in iterable:
44         yield before, item
45         before = item
46
47 class BoardData:
48
49     def __init__(self, bbs_type):
50         self.bbs_type = bbs_type
51
52     def set_status(self, text):
53         pass
54
55     def set_fraction(self, fraction):
56         pass
57
58     def _merge_new_thread(self, datalist, id, title, res, num, lastmod):
59         average = 0
60         if lastmod != 0:
61             try:
62                 start = int(id)
63             except ValueError:
64                 pass
65             else:
66                 # avoid the Last-Modified time of subject.txt and
67                 # the build time of thread is equal (zero division)
68                 dur = lastmod - start
69                 if dur == 0:
70                     average = 999999
71                 else:
72                     average = round(res * 60 * 60 * 24.0 / dur, 2)
73
74         if id in datalist:
75             item = datalist[id]
76             if item["num"]:
77                 # already exists in datalist and num is not 0, then this thread
78                 # is duplicate in subject.txt.
79                 # ignore second.
80                 pass
81             else:
82                 item["num"] = num
83                 item["title"] = title
84                 item["res"] = res
85                 item["average"] = average
86         else:
87             datalist[id] = {"id": id, "num": num, "title": title,
88                             "res": res, "lineCount": BOARD_DATA_INVALID_VALUE,
89                             "lastModified": 0, "average": average, "oldRes": 0}
90
91     def merge_local_subjecttxt(self, datalist):
92         try:
93             for id, title, res, num, lastmod in self._load_subjecttxt():
94                 self._merge_new_thread(datalist, id, title, res, num, lastmod)
95         except IOError:
96             pass
97         except:
98             tracebakc.print_exc()
99         else:
100             status = "Complete subject file."
101             gobject.idle_add(self.set_status, status)
102
103     def merge_remote_subjecttxt(self, datalist):
104         try:
105             for id, title, res, num, lastmod in self._get_subjecttxt():
106                 self._merge_new_thread(datalist, id, title, res, num, lastmod)
107         except IOError:
108             pass
109         except:
110             traceback.print_exc()
111
112     def _init_extra_data(self, dic):
113         dic["num"] = 0
114         dic["res"] = 0
115         dic["average"] = 0
116         dic["oldRes"] = 0
117         return dic
118
119     def _progressing(self, iterable):
120         for before, fraction in follow(iterable):
121             if int(before*10) != int(fraction*10):
122                 gtk.gdk.threads_enter()
123                 try:
124                     self.set_fraction(fraction)
125                 finally:
126                     gtk.gdk.threads_leave()
127             yield fraction
128
129     def _modify_dict(self, item_dict):
130         # lastModified, httpdate to second
131         httpdate = item_dict["lastModified"]
132         try:
133             secs = misc.httpdate_to_secs(httpdate)
134         except ValueError:
135             item_dict["lastModified"] = 0
136         else:
137             item_dict["lastModified"] = secs
138         return item_dict
139
140     def load_idxfiles(self):
141         print "load_cache"
142         try:
143             datalist = self._load_cache()
144         except IOError:
145             datalist = {}
146         print "load_idx"
147         self._load_modified_idxfiles(datalist)
148         print "save_cache"
149         try:
150             self._save_cache(datalist)
151         except IOError:
152             traceback.print_exc()
153
154         # adjustment after cache save, before load subject.txt
155         iterable = datalist.itervalues()
156         iterable = itertools.imap(self._modify_dict, iterable)
157         for i in iterable: -1
158
159         status = "Complete index files."
160         gobject.idle_add(self.set_status, status)
161         return datalist
162
163     def _load_cache(self):
164         try:
165             total = os.path.getsize(misc.get_board_cache_path(self.bbs_type))
166         except OSError:
167             total = -1
168
169         iterable = file(misc.get_board_cache_path(self.bbs_type))
170
171         # split
172         iterable_dic, iterable_line = itertools.tee(iterable)
173
174         iterable_dic = itertools.imap(lambda l: l.rstrip(), iterable_dic)
175         iterable_dic = cachefile.formatted_to_dict(iterable_dic)
176
177         iterable_line = itertools.imap(lambda x :len(x), iterable_line)
178         iterable_line = accumulate(iterable_line)
179         iterable_line = itertools.imap(
180             lambda value: float(value) / total / 5 * 2, iterable_line)
181         iterable_line = self._progressing(iterable_line)
182
183         # union
184         iterable = itertools.imap(lambda x, y: x, iterable_dic, iterable_line)
185
186         iterable = itertools.imap(self._init_extra_data, iterable)
187
188         return dict([(dic["id"], dic) for dic in iterable])
189
190     def _load_modified_idxfiles(self, datalist):
191         ext = ".idx"
192
193         def id_and_lastmod(file_path):
194             thread_id = os.path.basename(file_path)[:len(ext)*-1]
195             try:
196                 idxlastModified = int(os.path.getmtime(file_path))
197                 return thread_id, idxlastModified
198             except OSError:
199                 pass
200
201         def _do_new_thread(thread_id, idxlastModified):
202             print "new", thread_id
203
204             dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
205             dic["id"] = thread_id
206             dic["idxlastModified"] = idxlastModified
207             dic = self._init_extra_data(dic)
208             datalist[thread_id] = dic
209             return thread_id, idxlastModified
210
211         def _do_modified_thread(thread_id, idxlastModified):
212             print "modified", thread_id
213
214             datalist[thread_id]["idxlastModified"] = idxlastModified
215             dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
216             for key, value in dic.iteritems():
217                 datalist[thread_id][key] = value
218             return thread_id, idxlastModified
219
220         def new_or_modified_thread(thread_id, idxlastModified):
221             if thread_id not in datalist:
222                 return _do_new_thread(thread_id, idxlastModified)
223             elif idxlastModified > datalist[thread_id]["idxlastModified"]:
224                 return _do_modified_thread(thread_id, idxlastModified)
225             return thread_id, idxlastModified
226
227         basedir = misc.get_thread_idx_dir_path(self.bbs_type)
228
229         filelist = glob.glob(os.path.join(basedir, "*"+ext))
230         total = len(filelist)
231
232         iterable = filelist
233
234         # split
235         iterable, iterable_count = itertools.tee(iterable)
236
237         iterable_count = itertools.izip(itertools.count(1), iterable_count)
238         iterable_count = itertools.starmap(lambda x, y: x, iterable_count)
239         iterable_count = itertools.imap(
240             lambda x: float(x)/total/10 + 0.4, iterable_count)
241         iterable_count = self._progressing(iterable_count)
242
243         # union
244         iterable = itertools.imap(lambda x, y: x, iterable, iterable_count)
245
246         iterable = itertools.imap(id_and_lastmod, iterable)
247         iterable = itertools.ifilter(None, iterable)
248         iterable = itertools.starmap(new_or_modified_thread, iterable)
249         exist_key_set = frozenset([x for x, y in iterable])
250
251         # delete from datalist if idx file does not exist.
252         datalist_key_set = frozenset(datalist.iterkeys())
253         delete_key_set = datalist_key_set - exist_key_set
254         for key in delete_key_set:
255             del datalist[key]
256             print "del", key
257
258     def _save_cache(self, datalist):
259         iterable = datalist.iteritems()
260         iterable = cachefile.dict_to_formatted(iterable)
261         c_file = misc.FileWrap(misc.get_board_cache_path(self.bbs_type), "w")
262         c_file.writelines(iterable)
263
264     def _split_record(self, line_encoded):
265         line = line_encoded.decode(self.bbs_type.encoding, "replace")
266         m = self.bbs_type.subject_reg.match(line)
267         if m:
268             id = m.group("id")
269             title = m.group("title")
270             try:
271                 res = int(m.group("res"))
272             except ValueError:
273                 res = 0
274             return id, title, res
275         return None
276
277     def _load_subjecttxt(self):
278         lastmod = self.load_board_idx()
279         try:
280             lastmod = misc.httpdate_to_secs(lastmod)
281         except ValueError:
282             lastmod = 0
283
284         subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
285         try:
286             total = os.path.getsize(subjecttxt_path)
287         except OSError:
288             total = -1
289
290         iterable = file(subjecttxt_path)
291
292         # split
293         iterable, iterable_len = itertools.tee(iterable)
294
295         iterable_len = itertools.imap(lambda l: len(l), iterable_len)
296         iterable_len = accumulate(iterable_len)
297         iterable_len = itertools.imap(
298             lambda value: float(value) / total / 2 + 0.5, iterable_len)
299         iterable_len = self._progressing(iterable_len)
300
301         # union
302         iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
303
304         iterable = itertools.izip(itertools.count(1), iterable)
305
306         def main_process():
307             for num, line_encoded in iterable:
308                 result = self._split_record(line_encoded)
309                 if result:
310                     id, title, res = result
311                     yield id, title, res, num, lastmod
312
313         return main_process()
314
315     def _get_subjecttxt(self):
316
317         # get subject.txt
318
319         opener = urllib2.build_opener(HTTPRedirectHandler302, HTTPDebugHandler)
320         request = urllib2.Request(self.bbs_type.get_subject_txt_uri())
321         request.add_header("User-agent", config.User_Agent)
322         try:
323             response = opener.open(request)
324         except urllib2.HTTPError, e:
325             gobject.idle_add(self.set_status, "%d %s" % (e.code, e.msg))
326             print "switch to local"
327             return self._load_subjecttxt()
328         except urllib2.URLError, e:
329             print e
330             gobject.idle_add(self.set_status, str(e))
331             print "switch to local"
332             return self._load_subjecttxt()
333         else:
334             status = "%d %s" % (response.code, response.msg)
335             gobject.idle_add(self.set_status, status)
336             info = response.info()
337
338             lastmod = 0
339             if "Last-Modified" in info:
340                 _lastmod = info["Last-Modified"]
341                 self.save_board_idx(_lastmod)
342                 try:
343                     lastmod = misc.httpdate_to_secs(_lastmod)
344                 except ValueError:
345                     lastmod = 0
346
347             subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
348             f = misc.FileWrap(subjecttxt_path, "w")
349
350             try:
351                 total = int(info["Content-Length"])
352             except:
353                 total = -1
354
355             def saving(line_encoded):
356                 try:
357                     f.write(line_encoded)
358                 except IOError:
359                     traceback.print_exc()
360                 return line_encoded
361
362             iterable = response
363
364             # split
365             iterable, iterable_len = itertools.tee(iterable)
366
367             iterable_len = itertools.imap(lambda l: len(l), iterable_len)
368             iterable_len = accumulate(iterable_len)
369             iterable_len = itertools.imap(
370                 lambda value: float(value) / total, iterable_len)
371             iterable_len = self._progressing(iterable_len)
372
373             # union
374             iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
375
376             iterable = itertools.imap(saving, iterable)
377             iterable = itertools.izip(itertools.count(1), iterable)
378
379             def main_process():
380                 for num, line_encoded in iterable:
381                     result = self._split_record(line_encoded)
382                     if result:
383                         id, title, res = result
384                         yield id, title, res, num, lastmod
385
386             return main_process()
387
388     def load_board_idx(self):
389         lastmod = ""
390         boardidxfile = misc.get_board_idx_path(self.bbs_type)
391         try:
392             for line in file(boardidxfile):
393                 if line.startswith("lastModified="):
394                     lastmod = line[len("lastModified="):].rstrip("\n")
395                     break
396         except IOError:
397             traceback.print_exc()
398         return lastmod
399
400     def save_board_idx(self, lastmod):
401         if not lastmod:
402             return
403
404         boardidx_path = misc.get_board_idx_path(self.bbs_type)
405         basedir = os.path.dirname(boardidx_path)
406         if not os.path.isdir(basedir):
407             os.makedirs(basedir)
408
409         f = file(boardidx_path, "w")
410         f.write("lastModified=" + lastmod + "\n")
411         f.close()