OSDN Git Service

48cc21c9bc6f283533875712c7192adcaa875b7f
[fukui-no-namari/fukui-no-namari.git] / src / FukuiNoNamari / board_data.py
1 # Copyright (C) 2006 by Aiwota Programmer
2 # aiwotaprog@tetteke.tk
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 import gobject
19 import gtk
20 import os.path
21 import glob
22 import codecs
23 import urllib2
24 import traceback
25 import itertools
26 from StringIO import StringIO
27
28 import cachefile
29 import idxfile
30 import misc
31 import config
32 from http_sub import HTTPRedirectHandler302, HTTPDebugHandler
33 import network_manager
34
35 BOARD_DATA_INVALID_VALUE = 0
36
37
38 class NothingToDoException: pass
39
40
41 def accumulate(iterable, initial_value=0):
42     sum_value = initial_value
43     for value in iterable:
44         sum_value += value
45         yield sum_value
46
47 def follow(iterable, under_value=0):
48     before = under_value
49     for item in iterable:
50         yield before, item
51         before = item
52
53 class BoardData:
54
55     def __init__(self, bbs_type):
56         self.bbs_type = bbs_type
57         self.lastmod = ""
58
59     def set_status(self, text):
60         pass
61
62     def set_fraction(self, fraction):
63         pass
64
65     def _merge_new_thread(self, datalist, id, title, res, num, lastmod):
66         average = 0
67         if lastmod != 0:
68             try:
69                 start = int(id)
70             except ValueError:
71                 pass
72             else:
73                 # avoid the Last-Modified time of subject.txt and
74                 # the build time of thread is equal (zero division)
75                 dur = lastmod - start
76                 if dur == 0:
77                     average = 999999
78                 else:
79                     average = round(res * 60 * 60 * 24.0 / dur, 2)
80
81         if id in datalist:
82             item = datalist[id]
83             if item["num"]:
84                 # already exists in datalist and num is not 0, then this thread
85                 # is duplicate in subject.txt.
86                 # ignore second.
87                 pass
88             else:
89                 item["num"] = num
90                 item["title"] = title
91                 item["res"] = res
92                 item["average"] = average
93         else:
94             datalist[id] = {"id": id, "num": num, "title": title,
95                             "res": res, "lineCount": BOARD_DATA_INVALID_VALUE,
96                             "lastModified": 0, "average": average, "oldRes": 0}
97
98     def merge_local_subjecttxt(self, datalist):
99         for id, title, res, num, lastmod in self._load_subjecttxt():
100             self._merge_new_thread(datalist, id, title, res, num, lastmod)
101             yield
102         status = "Complete subject file."
103         lastmod = self.load_board_idx()
104         if lastmod:
105             self.lastmod = lastmod
106             status = "%s [%s]" % (status, lastmod)
107         self.set_status(status)
108
109     def merge_remote_subjecttxt(self, datalist, iterable):
110         for id, title, res, num, lastmod in iterable:
111             yield self._merge_new_thread(datalist, id, title, res, num, lastmod)
112
113     def _init_extra_data(self, dic):
114         dic["num"] = 0
115         dic["res"] = 0
116         dic["average"] = 0
117         dic["oldRes"] = 0
118         return dic
119
120     def _progressing(self, iterable):
121         for before, fraction in follow(iterable):
122             if int(before*10) != int(fraction*10):
123                 self.set_fraction(fraction)
124             yield fraction
125
126     def _modify_dict(self, item_dict):
127         # lastModified, httpdate to second
128         httpdate = item_dict["lastModified"]
129         try:
130             secs = misc.httpdate_to_secs(httpdate)
131         except ValueError:
132             item_dict["lastModified"] = 0
133         else:
134             item_dict["lastModified"] = secs
135         return item_dict
136
137     def load_idxfiles(self, datalist):
138         try:
139             for i in self._load_cache(datalist):
140                 yield
141         except IOError:
142             # the ".cache" file does not exist.
143             pass
144         else:
145             self.set_status("Complete load cache.")
146
147         for i in self._load_modified_idxfiles(datalist):
148             yield
149
150         self.set_status("Complete load idx files.")
151
152         self._save_cache(datalist)
153         # do not wait to save
154
155         # adjustment after cache save, before load subject.txt
156         iterable = datalist.itervalues()
157         iterable = itertools.imap(self._modify_dict, iterable)
158         for i in iterable:
159             yield
160
161     def _load_cache(self, datalist):
162         try:
163             total = os.path.getsize(misc.get_board_cache_path(self.bbs_type))
164         except OSError:
165             total = -1
166
167         iterable = file(misc.get_board_cache_path(self.bbs_type))
168
169         # split
170         iterable_dic, iterable_line = itertools.tee(iterable)
171
172         iterable_dic = itertools.imap(lambda l: l.rstrip(), iterable_dic)
173         iterable_dic = cachefile.formatted_to_dict(iterable_dic)
174
175         iterable_line = itertools.imap(lambda x :len(x), iterable_line)
176         iterable_line = accumulate(iterable_line)
177         iterable_line = itertools.imap(
178             lambda value: float(value) / total / 5 * 2, iterable_line)
179         iterable_line = self._progressing(iterable_line)
180
181         # union
182         iterable = itertools.imap(lambda x, y: x, iterable_dic, iterable_line)
183
184         iterable = itertools.imap(self._init_extra_data, iterable)
185
186         for dic in iterable:
187             datalist[dic["id"]] = dic
188             yield
189
190     def _load_modified_idxfiles(self, datalist):
191         ext = ".idx"
192
193         def id_and_lastmod(file_path):
194             thread_id = os.path.basename(file_path)[:len(ext)*-1]
195             try:
196                 idxlastModified = int(os.path.getmtime(file_path))
197                 return thread_id, idxlastModified
198             except OSError:
199                 pass
200
201         def _do_new_thread(thread_id, idxlastModified):
202             print "new", thread_id
203
204             dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
205             dic["id"] = thread_id
206             dic["idxlastModified"] = idxlastModified
207             dic = self._init_extra_data(dic)
208             datalist[thread_id] = dic
209             return thread_id, idxlastModified
210
211         def _do_modified_thread(thread_id, idxlastModified):
212             print "modified", thread_id
213
214             datalist[thread_id]["idxlastModified"] = idxlastModified
215             dic = idxfile.load_idx(self.bbs_type.clone_with_thread(thread_id))
216             for key, value in dic.iteritems():
217                 datalist[thread_id][key] = value
218             return thread_id, idxlastModified
219
220         def new_or_modified_thread(thread_id, idxlastModified):
221             if thread_id not in datalist:
222                 return _do_new_thread(thread_id, idxlastModified)
223             elif idxlastModified > datalist[thread_id]["idxlastModified"]:
224                 return _do_modified_thread(thread_id, idxlastModified)
225             return thread_id, idxlastModified
226
227         basedir = misc.get_thread_idx_dir_path(self.bbs_type)
228
229         filelist = glob.glob(os.path.join(basedir, "*"+ext))
230         total = len(filelist)
231
232         iterable = filelist
233
234         # split
235         iterable, iterable_count = itertools.tee(iterable)
236
237         iterable_count = itertools.izip(itertools.count(1), iterable_count)
238         iterable_count = itertools.starmap(lambda x, y: x, iterable_count)
239         iterable_count = itertools.imap(
240             lambda x: float(x)/total/10 + 0.4, iterable_count)
241         iterable_count = self._progressing(iterable_count)
242
243         # union
244         iterable = itertools.imap(lambda x, y: x, iterable, iterable_count)
245
246         iterable = itertools.imap(id_and_lastmod, iterable)
247         iterable = itertools.ifilter(None, iterable)
248         iterable = itertools.starmap(new_or_modified_thread, iterable)
249
250         exist_key_set = set()
251         iterable = itertools.starmap(lambda x, y: exist_key_set.add(x),
252             iterable)
253
254         for i in iterable:
255             yield
256
257         # delete from datalist if idx file does not exist.
258         datalist_key_set = frozenset(datalist.iterkeys())
259         delete_key_set = datalist_key_set - exist_key_set
260         for key in delete_key_set:
261             del datalist[key]
262             print "del", key
263             yield
264
265     def _save_cache(self, datalist):
266         iterable = datalist.items()
267         iterable = cachefile.dict_to_formatted(iterable)
268         c_file = misc.FileWrap(misc.get_board_cache_path(self.bbs_type), "w")
269         misc.chain(c_file.write, c_file.close, iterable)
270
271     def _split_record(self, line_encoded):
272         line = line_encoded.decode(self.bbs_type.encoding, "replace")
273         m = self.bbs_type.subject_reg.match(line)
274         if m:
275             id = m.group("id")
276             title = m.group("title")
277             try:
278                 res = int(m.group("res"))
279             except ValueError:
280                 res = 0
281             return id, title, res
282         return None
283
284     def _load_subjecttxt(self):
285         lastmod = self.load_board_idx()
286         try:
287             lastmod = misc.httpdate_to_secs(lastmod)
288         except ValueError:
289             lastmod = 0
290
291         subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
292         try:
293             total = os.path.getsize(subjecttxt_path)
294         except OSError:
295             total = -1
296
297         iterable = file(subjecttxt_path)
298
299         # split
300         iterable, iterable_len = itertools.tee(iterable)
301
302         iterable_len = itertools.imap(lambda l: len(l), iterable_len)
303         iterable_len = accumulate(iterable_len)
304         iterable_len = itertools.imap(
305             lambda value: float(value) / total / 2 + 0.5, iterable_len)
306         iterable_len = self._progressing(iterable_len)
307
308         # union
309         iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
310
311         iterable = itertools.izip(itertools.count(1), iterable)
312
313         def main_process():
314             for num, line_encoded in iterable:
315                 result = self._split_record(line_encoded)
316                 if result:
317                     id, title, res = result
318                     yield id, title, res, num, lastmod
319
320         return main_process()
321
322     def get_subjecttxt(self, on_received):
323         uri = self.bbs_type.get_subject_txt_uri()
324         request = urllib2.Request(self.bbs_type.get_subject_txt_uri())
325         request.add_header("User-agent", config.User_Agent)
326         if self.lastmod:
327             request.add_header("If-modified-since", self.lastmod)
328
329         try:
330             network_manager.request_get(uri, request.headers, on_received)
331         except network_manager.BusyException:
332             self.set_status("The network is busy. Try later.")
333             raise NothingToDoException()
334         else:
335             self.set_status("GET...")
336
337     def progress_response(self, response):
338         status = response.status
339         headers = response.headers
340         message = StringIO(response.message)
341
342         if "last-modified".capitalize() in headers:
343             self.set_status("%s [%s]" % (status,
344                 headers["last-modified".capitalize()]))
345         else:
346             self.set_status("%s" % status)
347
348         version, code, msg = status.split(None, 2)
349         code = int(code)
350         if code != 200:
351             raise misc.StopChainException()
352
353         lastmod = 0
354         if "last-modified".capitalize() in headers:
355             _lastmod = headers["last-modified".capitalize()]
356             self.lastmod = _lastmod
357             self.save_board_idx(_lastmod)
358             try:
359                 lastmod = misc.httpdate_to_secs(_lastmod)
360             except ValueError:
361                 lastmod = 0
362
363         subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
364         f = misc.FileWrap(subjecttxt_path, "w")
365
366         try:
367             total = int(headers["content-length".capitalize()])
368         except:
369             total = -1
370
371         def saving(line_encoded):
372             try:
373                 f.write(line_encoded)
374             except IOError:
375                 traceback.print_exc()
376             return line_encoded
377
378         iterable = message
379
380         # split
381         iterable, iterable_len = itertools.tee(iterable)
382
383         iterable_len = itertools.imap(lambda l: len(l), iterable_len)
384         iterable_len = accumulate(iterable_len)
385         iterable_len = itertools.imap(
386             lambda value: float(value) / total, iterable_len)
387         iterable_len = self._progressing(iterable_len)
388
389         # union
390         iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
391
392         iterable = itertools.imap(saving, iterable)
393         iterable = itertools.izip(itertools.count(1), iterable)
394
395         for num, line_encoded in iterable:
396             result = self._split_record(line_encoded)
397             if result:
398                 id, title, res = result
399                 yield id, title, res, num, lastmod
400         f.close()
401
402     def load_board_idx(self):
403         lastmod = ""
404         boardidxfile = misc.get_board_idx_path(self.bbs_type)
405         try:
406             for line in file(boardidxfile):
407                 if line.startswith("lastModified="):
408                     lastmod = line[len("lastModified="):].rstrip("\n")
409                     break
410         except IOError:
411             traceback.print_exc()
412         return lastmod
413
414     def save_board_idx(self, lastmod):
415         if not lastmod:
416             return
417
418         boardidx_path = misc.get_board_idx_path(self.bbs_type)
419         basedir = os.path.dirname(boardidx_path)
420         if not os.path.isdir(basedir):
421             os.makedirs(basedir)
422
423         f = file(boardidx_path, "w")
424         f.write("lastModified=" + lastmod + "\n")
425         f.close()