OSDN Git Service

some tiny fixes.
[otptools/otptools.git] / markupper.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3  
4 import sys
5 import os
6 import re
7 import codecs
8 import pickle
9
10 import HTMLTagFilter
11 import deterfile
12
13 #sys.stdin = codecs.getreader('utf_8')(sys.stdin)
14 #sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
15
16 alist = ["a", "a:href", "a:name", "b", "br" ]
17 dlist = ["*"]
18
19 tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
20 path_to_index = "./_markup_index"
21
22 class _InputStream(object):
23     """InputStream base class."""
24     def __init__(self):
25         pass
26
27     def __iter__(self):
28         """return Iterator"""
29         return self
30
31     def next(self):
32         """function for iterator"""
33         pass
34
35
36 class Markupper(object):
37     """
38     """
39     def __init__(self):
40         self._input_iter = None
41         self._index_past = {}
42         self._index = {}
43         self._image_dir = ""
44
45     def index_add(self, key, val):
46         """
47         Add key and value to index.
48
49         @param key:
50          @type key:
51
52         @param val:
53         @key val:
54         """
55         self._index[key] = val
56
57     def index(self, key):
58         """
59         Get index
60         """
61         return self._index[key]
62
63     def index_haskey(self, key):
64         return self._index.has_key(key)
65
66     def markup(self, input_iter, release="0"):
67         """
68         Do markup.
69
70         @param input_iter: iterator to use as input
71         @type input_iter: iterator
72         """
73         self.input_iter = input_iter
74         self._page_counter = 1
75         # alist = ["a", "a:href", "a:name", "b", "br" ]
76         # dlist = ["*"]
77         # tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
78         self._release = release
79
80         self.index_add("figs", [])
81
82         self._anchor = ""
83         for line in self.input_iter:
84             # line = self._default_markup_rule(line)
85             # head-of-line rules
86             if re.search(ur"^☆{{{$", line):
87                 self._inline(line)
88                 continue
89             elif re.search(ur"^☆image_dir:", line):
90                 self._image_dir = re.search(ur"^☆image_dir:\s*(.*)$", line).group(1)
91                 continue
92             elif re.search(ur"^☆comment\s{{{$", line):
93                 self._comment(line)
94                 continue
95             elif re.search(ur"^☆\*", line):
96                 self._anchor = re.sub(ur"^☆\*", "", line).strip()
97                 continue
98             elif re.search(ur"^☆clear\s+", line):
99                 self._clear(line)
100                 continue
101             elif re.search(ur"^・", line):
102                 self._ulist(line)
103                 continue
104             elif re.search(ur"^[0-9]\.", line):
105                 self._olist(line)
106                 continue
107             elif re.search(ur"^☆begin-column:", line):
108                 self._begin_column(line)
109                 continue
110             elif re.search(ur"^☆end-column", line):
111                 self._end_column(line)
112                 continue
113             elif re.search(ur"^☆space", line):
114                 self._space(line)
115                 continue
116             elif re.search(ur"^☆call_tables", line):
117                 self._call_tables(line)
118                 continue
119             elif re.search(ur"^●", line):
120                 self._head_l(line)
121                 continue
122             elif re.search(ur"^○", line):
123                 self._head_m(line)
124                 continue
125             elif re.search(ur"^☆----", line):
126                 self._newpage(line)
127                 continue
128             elif re.search(ur"^☆\+---", line):
129                 self._code(line)
130                 continue
131             elif re.search(ur"^☆表", line):
132                 self._table(line)
133                 continue
134             elif re.search(ur"^☆図", line):
135                 self._fig(line)
136                 continue
137             elif re.search(ur"^☆リスト", line):
138                 self._list(line)
139                 continue
140             elif re.search(ur"^☆flow", line):
141                 self._flow(line)
142                 continue
143
144             if re.search(ur"^ ", line):
145                 self._paragraph(line)
146                 continue
147
148
149             if re.search(r"^\s*$", line):
150                 line = ""
151
152             line = line.strip()
153             print line
154
155         # end-of-loop
156
157     def _clear(self, line):
158         print """<div style="clear:left;"> </div>
159 """
160
161     def _head_l(self, line):
162         line = line.rstrip()
163         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
164             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
165             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
166
167         line = self._default_markup_rule(line)
168         if self._anchor != "":
169             line = re.sub(ur"^●(.*)$", ur'<div id="%s"><h3>\1</h3></div>' % self._anchor, line)
170             self._anchor = ""
171         else:
172             line = re.sub(ur"^●(.*)$", ur"<h3>\1</h3>", line)
173         print line
174
175     def _head_m(self, line):
176         line = line.rstrip()
177         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
178             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
179             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
180
181         line = self._default_markup_rule(line)
182         if self._anchor != "":
183             line = re.sub(ur"^○(.*)$", ur'<div id="%s"><h4>\1</h4></div>' % self._anchor, line)
184             self._anchor = ""
185         else:
186             line = re.sub(ur"^○(.*)$", ur"<h4>\1</h4>", line)
187         print line
188
189     def _paragraph(self, line):
190         line = self._default_markup_rule(line)
191         line = "<p>" + line + "</p>"
192         print line
193
194     def _newpage(self, line):
195         line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
196         print line
197
198     def load_index(self, path_to_index):
199         """
200         load index database.
201
202         @param path_to_index: index db's path
203         @type path_to_index: string
204         """
205         # load index
206         try:
207             index_file = open(path_to_index, "r")
208             self._index_past = pickle.load(index_file)
209             index_file.close()
210         except IOError:
211             sys.stderr.write("warn: cannot read index file,\n")
212
213     def save_index(self, path_to_index):
214         """
215         save index database.
216
217         @param path_to_index: index db's path
218         @type path_to_index: string
219         """
220         # save index
221         try:
222             index_file = open(path_to_index, "w")
223             pickle.dump(self._index, index_file)
224             index_file.close()
225         except IOError:
226             sys.stderr.write("warn: cannot write index file,\n")
227
228     def make_hashlist(self, path_to_hashfile):
229         """
230         create hash list.
231
232         @param path_to_hashfile: hashfile's path
233         @type path_to_hashfile: string
234         """
235         try:
236             file_img_hash = open(path_to_hashfile, "r")
237         except IOError:
238             sys.stderr.write("cannot open file: %s" % path_img_hash)
239             return None;
240
241         self.hashlist = {};
242         for line in file_img_hash:
243             splited = line.strip().split("\t", 2)
244             # hashlist's format: <hash> \t <filename>
245             self.hashlist[splited[1]] = splited[0]
246
247     def _call_tagles(self):
248         pass
249
250     def _escape(self, line):
251         line = re.sub(ur"&", ur"&amp", line)
252         line = re.sub(ur"<", ur"&lt;", line)
253         line = re.sub(ur">", ur"&gt;", line)
254         return line
255
256     def _default_markup_rule(self, line):
257         """
258         apply default markup rules.
259
260         @param line: string to apply markup
261         @type line: string
262         """
263         line = self._escape(line)
264
265         # apply filter
266         # line = tag_filter.apply(line)
267
268         line = re.sub(ur"[★*](表[0-9~、]+)", ur"<b>\1</b>", line)
269         line = re.sub(ur"[★*](図[0-9~、]+)", ur"<b>\1</b>", line)
270         line = re.sub(ur"[★*](リスト[0-9~、]+)", ur"<b>\1</b>", line)
271         line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
272         line = re.sub(ur"[★*]b\{(.*?)\}", ur"<b>\1</b>", line)
273         line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
274         line = re.sub(ur"[★*]\[(\S*)\]", r'<a href="\1">\1</a>', line)
275
276         # comment
277         if re.search(ur"^☆#", line):
278             line = ""
279
280         return line
281
282
283     def _ulist(self, line):
284         """Proccess ul"""
285         print "<ul>"
286         while re.search(ur"^・", line):
287             line = self._default_markup_rule(line)
288             print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
289             line = self.input_iter.next()
290         print "</ul>\n"
291
292     def _olist(self, line):
293         """Proccess ul"""
294         print "<ol>"
295         while re.search(ur"^[0-9]+\.", line):
296             line = self._default_markup_rule(line)
297             print re.sub(ur"^[0-9]+\.(.*)$", ur"<li>\1</li>", line.strip())
298             line = self.input_iter.next()
299         print "</ol>\n"
300
301
302
303     def _begin_column(self, line):
304         """Proccess column"""
305         try:
306             str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
307         except AttributeError:
308             str_title = ""
309
310         html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
311     <tr><th>%s</th></tr>
312     <tr><td><span style="font-size: 85%%;">
313     """ % (str_title)
314         print html
315
316     def _end_column(self, line):
317         print """</span></td></tr>
318     </table>
319     """
320
321     def _list_start(self):
322         return "<pre>"
323
324     def _list_end(self):
325         return "</pre>"
326
327     def _list(self, line):
328         try:
329             str_title = re.search(ur"^☆(リスト.*)$", line).group(1)
330         except AttributeError:
331             str_title = ""
332         print "<p><b>%s</b></p>" % (str_title)
333         print self._list_start()
334
335         for line in self.input_iter:
336             line = line.strip("\n\r")
337             line = self._escape(line)
338             if re.search(ur"""^☆\+---""", line):
339                 break
340             print line
341         print self._list_end()
342
343     def _code(self, line):
344         print self._list_start()
345
346         for line in self.input_iter:
347             line = self._escape(line)
348             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
349             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
350
351             if re.search(ur"^☆\+---$", line):
352                 break
353             print line,
354         print self._list_end()
355
356     def _inline(self, line):
357         for line in self.input_iter:
358             #        line = line.strip()
359             if re.search(ur"^☆}}}", line):
360                 break
361             print line
362
363     def _comment(self, line):
364         for line in self.input_iter:
365             line = line.strip()
366             if re.search(ur"^☆}}}", line):
367                 break
368
369     def _space(self, line):
370         print "<br><br>"
371
372     def _flow(self, line):
373         down_arrow = "http://static.sourceforge.jp/crystal/22x22/actions/1downarrow.png"
374         flow_header = """<div style="text-align:center; border: 1px solid; background-color:#EFF2F0; width:90%; margin: 0 auto 1em;">
375 """
376         flow_title = """<div style="text-align:left; padding:4px 4px 4px 1em; margin-bottom: 1em; border-bottom: 1px solid; font-weight: bold; background-color:#BCD;">
377 %s
378 </div>"""
379         flow_footer = """</div>
380 """
381         flow_item = """<div>
382 %s
383 <p>%s</p>
384 </div>
385
386 """
387         arrow = '<div style="margin:1em auto;"><img src="%s"></div>\n' % (down_arrow,)
388
389         rex_title = re.compile(ur"^☆flow\s+(.*)$")
390         if rex_title.search(line):
391             title = rex_title.search(line).group(1)
392         else:
393             title = ""
394
395         rex_file = re.compile(ur"^([^:]*):(.*)$")
396         outputs = []
397         for line in self.input_iter:
398             if re.search(r"^\s*$", line):
399                 break
400             match = rex_file.search(line)
401             if match:
402                 file = os.path.join(self._image_dir, match.group(1))
403                 cap = self._default_markup_rule(match.group(2))
404             else:
405                 continue
406             fig = self._anchored_fig(file, cap)
407             outputs.append(flow_item % (fig, cap))
408
409         print flow_header
410         print flow_title % (title,)
411         print arrow.join(outputs)
412         print flow_footer
413         
414
415     def _fig_start(self, cap="", styles=[], width=0, height=0):
416         params = dict(style="", tablewidth="")
417         if width != 0:
418             params["style"] = "width:%d;" % (width,)
419             params["tablewidth"] = 'width="%d"' % (width,)
420
421         if "lfloat" in styles:
422             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0" style="float:left; padding-left: 0.5em; %(style)s">
423     <tr> <td valign="top" align="center">
424     """ % params
425         elif "left" in styles:
426             return """<table %(tablewidth)s border="0" cellpadding="0" cellspacing="0" style="padding-left: 0.5em; %(style)s">
427     <tr> <td valign="top" align="center">
428     """ %params
429         else:
430             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0">
431     <tr> <td valign="top" align="center">
432     """ % params
433
434     def _fig_end(self, cap="", styles=[]):
435         return """</td> </tr>
436     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
437     %s
438     </span></td> </tr>
439     </table>
440     """ % (cap)
441
442     def _fig(self, line):
443         if self._release == 1:
444             self._fig_release(line)
445         else:
446             self._fig(line)
447
448     def _get_png_geom(self, filepath):
449         desc = deterfile.file(filepath)
450         try:
451             m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
452         except IndexError:
453             err = ",".join(desc)
454             raise Exception("deterfile error: %s, file: %s" % (err,filepath))
455         if m:
456             w = m.group(1)
457             h = m.group(2)
458             return (int(w), int(h))
459         else:
460             return None
461
462     def _fig(self, line):
463         try:
464             str_title = re.search(ur"^☆(図.*)$", line).group(1)
465         except AttributeError:
466             str_title = ""
467         if str_title.find(u"図*") == 0:
468             str_title = str_title.replace(u"図*", "")
469         line = self.input_iter.next()
470         styles = []
471         if line.find("@") == 0:
472             styles = line.strip().replace("@", "").split(",")
473             line = self.input_iter.next()
474
475         imgname = ""
476         imgname_s = ""
477         hash = ""
478         hash_s = ""
479         match_o1 = re.search(ur"<([^,]*?)>", line)
480         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
481         if not match_o1 == None:
482             imgname = match_o1.group(1)
483             imgname = os.path.join(self._image_dir, imgname)
484             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
485         elif not match_o2 == None:
486             imgname = match_o2.group(1)
487             imgname = os.path.join(self._image_dir, imgname)
488             imgname_s = match_o2.group(2)
489
490         geom = self._get_png_geom(imgname_s)
491         if geom:
492             w = geom[0]
493             h = geom[1]
494             print self._fig_start("", styles, width=w, height=h)
495         else:
496             print self._fig_start("", styles)
497         print self._anchored_fig(imgname, str_title, imgname_s)
498         print self._fig_end(str_title, styles);
499
500         dic = self.index("figs")
501         dic.append(imgname)
502         if imgname_s != "":
503             dic.append(imgname_s)
504
505         
506     def _anchored_fig(self, file, alt, file_s=""):
507
508         if file_s == "":
509             file_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", file)
510
511         if not os.path.isfile(file_s):
512             file_s = file
513
514         alt = re.sub(r"""<[A-Za-z0-9!/]+.*?>""", "", alt)
515
516         return """<a href="%s">
517   <img src="%s" alt="%s">
518 </a>
519 """ % (file, file_s, alt)
520         
521
522     def _fig_release(self, line):
523         try:
524             str_title = re.search(ur"^☆(図.*)$", line).group(1)
525         except AttributeError:
526             str_title = ""
527         print self._fig_start()
528
529         line = self.input_iter.next()
530         imgname = ""
531         imgname_s = ""
532         hash = ""
533         hash_s = ""
534         match_o1 = re.search(ur"<([^,]*?)>", line)
535         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
536         if not match_o1 == None:
537             imgname = match_o1.group(1)
538             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
539         elif not match_o2 == None:
540             imgname = match_o1.group(1)
541             imgname_s = match_o1.group(2)
542
543
544         hash = self.hashlist.get(imgname, "")
545         hash_s = self.hashlist.get(imgname_s, "")
546         if hash_s == "":
547             hash_s = hash
548
549         print """<a href="/blob.pl?id=%s">
550      <slash type="image" id="%s" title="%s">
551      </a>
552      """ % (hash, hash_s, str_title)
553         
554
555         dic = self.index("figs")
556         dic.append(imgname)
557         if imgname_s != "":
558             dic.append(imgname_s)
559
560         print self._fig_end(str_title);
561
562
563     def _table_start(self, cap):
564         return """<div style="width:90%%; margin-left:auto;margin-right:auto;"><table align="center" border="1" class="table">
565     <caption><b>%s</b></caption>
566     """ % cap
567
568     def _table_end(self, footnote=""):
569         return "</table>\n%s</div>\n" % (footnote,)
570
571     def _table(self, line):
572         str_title = ""
573         self._table_buf1 = ""
574
575         try:
576             str_title = re.search(ur"^☆(表.*)$", line).group(1)
577             fig_name =  re.search(ur"^☆(表[0-9A-Z]*)", line).group(1)
578         except AttributeError:
579             str_title = ""
580             fig_name = ""
581         if str_title.find(u"表*") == 0:
582             str_title = str_title.replace(u"表*", "")
583
584         print self._table_start(str_title)
585         self._table_buf1 =  self._table_start(str_title)
586
587         num_row = 0
588         table_contents = []
589         footnote = ""
590         for line in self.input_iter:
591             line = line.strip(" \n")
592             line = self._default_markup_rule(line)
593             if re.search(ur"^\s*$", line):
594                 break
595             if re.search(ur"^※", line):
596                 footnote = re.search(ur"^(※.*)$", line).group(1)
597                 break
598             line = self._default_markup_rule(line)
599             if re.search(ur"^〓", line):
600                 line = re.sub(ur"^〓", "", line)
601                 tag_mode = "th"
602             else:
603                 tag_mode = "td"
604             table_contents.append([])
605             num_col = 0
606             for item in line.split("\t"):
607                 if item == "":
608                     if num_col == 0:
609                         n = 1
610                         try:
611                             while table_contents[num_row-n][num_col]["item"] == "":
612                                 n += 1
613                             table_contents[num_row-n][num_col]["row"] += 1
614                         except IndexError:
615                             pass
616                     else:
617                         n = 1
618                         try:
619                             while table_contents[num_row][num_col-n]["item"] == "":
620                                 n += 1
621                             table_contents[num_row][num_col-n]["col"] += 1
622                         except IndexError:
623                             pass
624
625                 table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
626                 num_col = num_col + 1
627             num_row = num_row + 1
628
629         for row_item in table_contents:
630             line = "<tr>"
631             for item in row_item:
632                 if item["item"] == "":
633                     continue
634                 line = line + "<" + item["tag"]
635                 if not item["row"] == 1:
636                     line = line + (' rowspan="%s"' % item["row"])
637                 if not item["col"] == 1:
638                     line = line + (' colspan="%s"' % item["col"])
639                 line = line +  ">"
640                 line = line + item["item"]
641                 line = line + "</" + item["tag"] + ">"
642             line = line + "</tr>\n"
643             print line,
644             self._table_buf1 = self._table_buf1 + line
645
646             # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
647             # line = line.replace("\t", "</th><th>")
648             # print line
649             # else:
650             # line = "<tr><td>" + line + "</td></tr>"
651             # line = line.replace("\t", "</td><td>")
652             # print line
653
654         print self._table_end(footnote)
655         self._table_buf1 =  self._table_buf1 + self._table_end()
656         if self.index_haskey("tables"):
657             self.index("tables")[fig_name] = self._table_buf1
658         else:
659             self.index_add("tables", {fig_name:self._table_buf1})
660
661     def _call_tables(self, line):
662         try:
663             fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
664         except AttributeError:
665             return
666         print self.index("tables")[fig_name]