OSDN Git Service

Use progressbar of board window.
[fukui-no-namari/fukui-no-namari.git] / src / FukuiNoNamari / board_data.py
1 # Copyright (C) 2006 by Aiwota Programmer
2 # aiwotaprog@tetteke.tk
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 import gobject
19 import gtk
20 import os.path
21 import glob
22 import codecs
23 import urllib2
24 import traceback
25 import itertools
26
27 import cachefile
28 import idxfile
29 import misc
30 import config
31 from http_sub import HTTPRedirectHandler302, HTTPDebugHandler
32
33 BOARD_DATA_INVALID_VALUE = 0
34
35 def accumulate(iterable, initial_value=0):
36     sum_value = initial_value
37     for value in iterable:
38         sum_value += value
39         yield sum_value
40
41 def follow(iterable, under_value=0):
42     before = under_value
43     for item in iterable:
44         yield before, item
45         before = item
46
47 class BoardData:
48
49     def __init__(self, bbs_type):
50         self.bbs_type = bbs_type
51
52     def set_status(self, text):
53         pass
54
55     def set_fraction(self, fraction):
56         pass
57
58     def _merge_new_thread(self, datalist, id, title, res, num, lastmod):
59         average = 0
60         if lastmod != 0:
61             try:
62                 start = int(id)
63             except ValueError:
64                 pass
65             else:
66                 # avoid the Last-Modified time of subject.txt and
67                 # the build time of thread is equal (zero division)
68                 dur = lastmod - start
69                 if dur == 0:
70                     average = 999999
71                 else:
72                     average = round(res * 60 * 60 * 24.0 / dur, 2)
73
74         if id in datalist:
75             item = datalist[id]
76             if item["num"]:
77                 # already exists in datalist and num is not 0, then this thread
78                 # is duplicate in subject.txt.
79                 # ignore second.
80                 pass
81             else:
82                 item["num"] = num
83                 item["title"] = title
84                 item["res"] = res
85                 item["average"] = average
86         else:
87             datalist[id] = {"id": id, "num": num, "title": title,
88                             "res": res, "lineCount": BOARD_DATA_INVALID_VALUE,
89                             "lastModified": "", "average": average}
90
91     def merge_local_subjecttxt(self, datalist):
92         iterable = self._load_subjecttxt()
93
94         for id, title, res, num, lastmod in iterable:
95             self._merge_new_thread(datalist, id, title, res, num, lastmod)
96
97         status = "Complete subject file."
98         gobject.idle_add(self.set_status, status)
99
100     def merge_remote_subjecttxt(self, datalist):
101         iterable = self._get_subjecttxt()
102
103         for id, title, res, num, lastmod in iterable:
104             self._merge_new_thread(datalist, id, title, res, num, lastmod)
105
106     def _init_extra_data(self, dic):
107         dic["num"] = 0
108         dic["res"] = 0
109         dic["average"] = 0
110         return dic
111
112     def _progressing(self, iterable):
113         for before, fraction in follow(iterable):
114             if int(before*10) != int(fraction*10):
115                 gtk.threads_enter()
116                 try:
117                     self.set_fraction(fraction)
118                 finally:
119                     gtk.threads_leave()
120             yield fraction
121
122     def load_idxfiles(self):
123         print "load_cache"
124         datalist = self._load_cache()
125         print "load_idx"
126         self._load_modified_idxfiles(datalist)
127         print "save_cache"
128         cachefile.save_cache(self.bbs_type, datalist)
129
130         status = "Complete index files."
131         gobject.idle_add(self.set_status, status)
132         return datalist
133
134     def _load_cache(self):
135         try:
136             total = os.path.getsize(misc.get_board_cache_path(self.bbs_type))
137         except OSError:
138             total = -1
139
140         iterable = cachefile.load_cache(self.bbs_type)
141
142         # split
143         iterable_dic, iterable_line = itertools.tee(iterable)
144         iterable_dic = itertools.starmap(lambda x, y: x, iterable_dic)
145         iterable_line = itertools.starmap(lambda x, y: y, iterable_line)
146
147         iterable_line = itertools.imap(lambda x :len(x), iterable_line)
148         iterable_line = accumulate(iterable_line)
149         iterable_line = itertools.imap(
150             lambda value: float(value) / total / 2, iterable_line)
151         iterable_line = self._progressing(iterable_line)
152
153         # union
154         iterable = itertools.imap(lambda x, y: x, iterable_dic, iterable_line)
155
156         iterable = itertools.imap(self._init_extra_data, iterable)
157
158         return dict([(dic["id"], dic) for dic in iterable])
159
160     def _load_modified_idxfiles(self, datalist):
161         basedir = misc.get_thread_idx_dir_path(self.bbs_type)
162         ext = ".idx"
163         exist_key_set = set()
164         if os.path.isdir(basedir):
165             for idxfile_path in glob.glob(os.path.join(basedir, "*"+ext)):
166                 basename = os.path.basename(idxfile_path)
167                 thread_id = basename[:len(ext)*-1]
168                 try:
169                     idxlastModified = os.path.getmtime(idxfile_path)
170                 except OSError:
171                     continue
172                 exist_key_set.add(thread_id)
173                 if thread_id not in datalist:
174                     print "new", thread_id
175                     bbs_type_for_thread = self.bbs_type.clone_with_thread(
176                         thread_id)
177                     dic = idxfile.load_idx(bbs_type_for_thread)
178                     dic["id"] = thread_id
179                     dic["idxlastModified"] = idxlastModified
180                     dic = self._init_extra_data(dic)
181                     datalist[thread_id] = dic
182                 elif idxlastModified > datalist[thread_id]["idxlastModified"]:
183                     print "modified", thread_id
184                     bbs_type_for_thread = self.bbs_type.clone_with_thread(
185                         thread_id)
186                     datalist[thread_id]["idxlastModified"] = idxlastModified
187                     dic = idxfile.load_idx(bbs_type_for_thread)
188                     for key, value in dic.iteritems():
189                         datalist[thread_id][key] = value
190
191         # delete from datalist if idx file does not exist.
192         for key in datalist.keys():
193             if key not in exist_key_set:
194                 del datalist[key]
195                 print "del", key
196
197     def _split_record(self, line_encoded):
198         line = line_encoded.decode(self.bbs_type.encoding, "replace")
199         m = self.bbs_type.subject_reg.match(line)
200         if m:
201             id = m.group("id")
202             title = m.group("title")
203             try:
204                 res = int(m.group("res"))
205             except ValueError:
206                 res = 0
207             return id, title, res
208         return None
209
210     def _load_subjecttxt(self):
211         lastmod = self.load_board_idx()
212         try:
213             lastmod = misc.httpdate_to_secs(lastmod)
214         except ValueError:
215             lastmod = 0
216
217         subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
218         try:
219             total = os.path.getsize(subjecttxt_path)
220         except OSError:
221             total = -1
222
223         iterable = file(subjecttxt_path)
224
225         # split
226         iterable, iterable_len = itertools.tee(iterable)
227
228         iterable_len = itertools.imap(lambda l: len(l), iterable_len)
229         iterable_len = accumulate(iterable_len)
230         iterable_len = itertools.imap(
231             lambda value: float(value) / total / 2 + 0.5, iterable_len)
232         iterable_len = self._progressing(iterable_len)
233
234         # union
235         iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
236
237         for num, line_encoded in itertools.izip(itertools.count(1), iterable):
238             result = self._split_record(line_encoded)
239             if result:
240                 id, title, res = result
241                 yield id, title, res, num, lastmod
242
243     def _get_subjecttxt(self):
244
245         # get subject.txt
246
247         opener = urllib2.build_opener(HTTPRedirectHandler302, HTTPDebugHandler)
248         request = urllib2.Request(self.bbs_type.get_subject_txt_uri())
249         request.add_header("User-agent", config.User_Agent)
250         try:
251             response = opener.open(request)
252         except urllib2.HTTPError, e:
253             gobject.idle_add(self.set_status, "%d %s" % (e.code, e.msg))
254             print "switch to local"
255             self._load_subjecttxt(func)
256         except urllib2.URLError, e:
257             print e
258             gobject.idle_add(self.set_status, str(e))
259             print "switch to local"
260             self._load_subjecttxt(func)
261         else:
262             status = "%d %s" % (response.code, response.msg)
263             gobject.idle_add(self.set_status, status)
264             info = response.info()
265
266             lastmod = 0
267             if "Last-Modified" in info:
268                 _lastmod = info["Last-Modified"]
269                 self.save_board_idx(_lastmod)
270                 try:
271                     lastmod = misc.httpdate_to_secs(_lastmod)
272                 except ValueError:
273                     lastmod = 0
274
275             subjecttxt_path = misc.get_board_subjecttxt_path(self.bbs_type)
276             f = misc.FileWrap(subjecttxt_path, "w")
277
278             try:
279                 total = int(info["Content-Length"])
280             except:
281                 total = -1
282
283             def saving(line_encoded):
284                 try:
285                     f.write(line_encoded)
286                 except IOError:
287                     traceback.print_exc()
288                 return line_encoded
289
290             iterable = response
291
292             # split
293             iterable, iterable_len = itertools.tee(iterable)
294
295             iterable_len = itertools.imap(lambda l: len(l), iterable_len)
296             iterable_len = accumulate(iterable_len)
297             iterable_len = itertools.imap(
298                 lambda value: float(value) / total / 2 + 0.5, iterable_len)
299             iterable_len = self._progressing(iterable_len)
300
301             # union
302             iterable = itertools.imap(lambda x, y: x, iterable, iterable_len)
303
304             iterable = itertools.imap(saving, iterable)
305             iterable = itertools.izip(itertools.count(1), iterable)
306
307             for num, line_encoded in iterable:
308                 result = self._split_record(line_encoded)
309                 if result:
310                     id, title, res = result
311                     yield id, title, res, num, lastmod
312
313             f.close()
314
315     def load_board_idx(self):
316         lastmod = ""
317         boardidxfile = misc.get_board_idx_path(self.bbs_type)
318         try:
319             for line in file(boardidxfile):
320                 if line.startswith("lastModified="):
321                     lastmod = line[len("lastModified="):].rstrip("\n")
322                     break
323         except IOError:
324             traceback.print_exc()
325         return lastmod
326
327     def save_board_idx(self, lastmod):
328         if not lastmod:
329             return
330
331         boardidx_path = misc.get_board_idx_path(self.bbs_type)
332         basedir = os.path.dirname(boardidx_path)
333         if not os.path.isdir(basedir):
334             os.makedirs(basedir)
335
336         f = file(boardidx_path, "w")
337         f.write("lastModified=" + lastmod + "\n")
338         f.close()