OSDN Git Service

d4ad4337fae398e0f69bdabeb79ddea94ef67073
[otptools/otptools.git] / markupper.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3  
4 import sys
5 import os
6 import re
7 import codecs
8 import pickle
9
10 import HTMLTagFilter
11 import deterfile
12
13 #sys.stdin = codecs.getreader('utf_8')(sys.stdin)
14 #sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
15
16 alist = ["a", "a:href", "a:name", "b", "br" ]
17 dlist = ["*"]
18
19 tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
20 path_to_index = "./_markup_index"
21
22 class _InputStream(object):
23     """InputStream base class."""
24     def __init__(self):
25         pass
26
27     def __iter__(self):
28         """return Iterator"""
29         return self
30
31     def next(self):
32         """function for iterator"""
33         pass
34
35
36 class Markupper(object):
37     """
38     """
39     def __init__(self):
40         self._input_iter = None
41         self._index_past = {}
42         self._index = {}
43         self._image_dir = ""
44
45     def index_add(self, key, val):
46         """
47         Add key and value to index.
48
49         @param key:
50          @type key:
51
52         @param val:
53         @key val:
54         """
55         self._index[key] = val
56
57     def index(self, key):
58         """
59         Get index
60         """
61         return self._index[key]
62
63     def index_haskey(self, key):
64         return self._index.has_key(key)
65
66     def markup(self, input_iter, release="0"):
67         """
68         Do markup.
69
70         @param input_iter: iterator to use as input
71         @type input_iter: iterator
72         """
73         self.input_iter = input_iter
74         self._page_counter = 1
75         self._image_border = 0
76         # alist = ["a", "a:href", "a:name", "b", "br" ]
77         # dlist = ["*"]
78         # tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
79         self._release = release
80
81         self.index_add("figs", [])
82
83         self._anchor = ""
84         for line in self.input_iter:
85             # line = self._default_markup_rule(line)
86             # head-of-line rules
87             if re.search(ur"^☆{{{$", line):
88                 self._inline(line)
89                 continue
90             elif re.search(ur"^☆image_dir:", line):
91                 self._image_dir = re.search(ur"^☆image_dir:\s*(.*)$", line).group(1)
92                 continue
93             elif re.search(ur"^☆image_border:\s(on|On|ON)", line):
94                 self._image_border = 1
95                 continue
96             elif re.search(ur"^☆comment\s{{{$", line):
97                 self._comment(line)
98                 continue
99             elif re.search(ur"^☆\*", line):
100                 self._anchor = re.sub(ur"^☆\*", "", line).strip()
101                 continue
102             elif re.search(ur"^☆clear\s+", line):
103                 self._clear(line)
104                 continue
105             elif re.search(ur"^・", line):
106                 self._ulist(line)
107                 continue
108             elif re.search(ur"^[0-9]\.", line):
109                 self._olist(line)
110                 continue
111             elif re.search(ur"^☆begin-column:", line):
112                 self._begin_column(line)
113                 continue
114             elif re.search(ur"^☆end-column", line):
115                 self._end_column(line)
116                 continue
117             elif re.search(ur"^☆space", line):
118                 self._space(line)
119                 continue
120             elif re.search(ur"^☆call_tables", line):
121                 self._call_tables(line)
122                 continue
123             elif re.search(ur"^●", line):
124                 self._head_l(line)
125                 continue
126             elif re.search(ur"^○", line):
127                 self._head_m(line)
128                 continue
129             elif re.search(ur"^☆----", line):
130                 self._newpage(line)
131                 continue
132             elif re.search(ur"^☆\+---", line):
133                 self._code(line)
134                 continue
135             elif re.search(ur"^☆表", line):
136                 self._table(line)
137                 continue
138             elif re.search(ur"^☆図", line):
139                 self._fig(line)
140                 continue
141             elif re.search(ur"^☆写真", line):
142                 self._photo(line)
143                 continue
144             elif re.search(ur"^☆リスト", line):
145                 self._list(line)
146                 continue
147             elif re.search(ur"^☆flow", line):
148                 self._flow(line)
149                 continue
150
151             if re.search(ur"^ ", line):
152                 self._paragraph(line)
153                 continue
154
155
156             if re.search(r"^\s*$", line):
157                 line = ""
158
159             line = line.strip()
160             print line
161
162         # end-of-loop
163
164     def _clear(self, line):
165         print """<div style="clear:left;"> </div>
166 """
167
168     def _head_l(self, line):
169         line = line.rstrip()
170         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
171             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
172             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
173
174         line = self._default_markup_rule(line)
175         if self._anchor != "":
176             line = re.sub(ur"^●(.*)$", ur'<div id="%s"><h3>\1</h3></div>' % self._anchor, line)
177             self._anchor = ""
178         else:
179             line = re.sub(ur"^●(.*)$", ur"<h3>\1</h3>", line)
180         print line
181
182     def _head_m(self, line):
183         line = line.rstrip()
184         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
185             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
186             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
187
188         line = self._default_markup_rule(line)
189         if self._anchor != "":
190             line = re.sub(ur"^○(.*)$", ur'<div id="%s"><h4>\1</h4></div>' % self._anchor, line)
191             self._anchor = ""
192         else:
193             line = re.sub(ur"^○(.*)$", ur"<h4>\1</h4>", line)
194         print line
195
196     def _paragraph(self, line):
197         line = self._default_markup_rule(line)
198         line = "<p>" + line + "</p>"
199         print line
200
201     def _newpage(self, line):
202         line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
203         print line
204
205     def load_index(self, path_to_index):
206         """
207         load index database.
208
209         @param path_to_index: index db's path
210         @type path_to_index: string
211         """
212         # load index
213         try:
214             index_file = open(path_to_index, "r")
215             self._index_past = pickle.load(index_file)
216             index_file.close()
217         except IOError:
218             sys.stderr.write("warn: cannot read index file,\n")
219
220     def save_index(self, path_to_index):
221         """
222         save index database.
223
224         @param path_to_index: index db's path
225         @type path_to_index: string
226         """
227         # save index
228         try:
229             index_file = open(path_to_index, "w")
230             pickle.dump(self._index, index_file)
231             index_file.close()
232         except IOError:
233             sys.stderr.write("warn: cannot write index file,\n")
234
235     def make_hashlist(self, path_to_hashfile):
236         """
237         create hash list.
238
239         @param path_to_hashfile: hashfile's path
240         @type path_to_hashfile: string
241         """
242         try:
243             file_img_hash = open(path_to_hashfile, "r")
244         except IOError:
245             sys.stderr.write("cannot open file: %s" % path_img_hash)
246             return None;
247
248         self.hashlist = {};
249         for line in file_img_hash:
250             splited = line.strip().split("\t", 2)
251             # hashlist's format: <hash> \t <filename>
252             self.hashlist[splited[1]] = splited[0]
253
254     def _call_tagles(self):
255         pass
256
257     def _escape(self, line):
258         line = re.sub(ur"&", ur"&amp;", line)
259         line = re.sub(ur"<", ur"&lt;", line)
260         line = re.sub(ur">", ur"&gt;", line)
261         return line
262
263     def _default_markup_rule(self, line):
264         """
265         apply default markup rules.
266
267         @param line: string to apply markup
268         @type line: string
269         """
270         line = self._escape(line)
271
272         # apply filter
273         # line = tag_filter.apply(line)
274
275         line = re.sub(ur"[★*](表[0-9~、]+)", ur"<b>\1</b>", line)
276         line = re.sub(ur"[★*](図[0-9~、]+)", ur"<b>\1</b>", line)
277         line = re.sub(ur"[★*](写真[0-9~、]+)", ur"<b>\1</b>", line)
278         line = re.sub(ur"[★*](リスト[0-9~、]+)", ur"<b>\1</b>", line)
279         line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
280         line = re.sub(ur"[★*]b\{(.*?)\}", ur"<b>\1</b>", line)
281         line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
282         line = re.sub(ur"[★*]\[(\S*)\]", r'<a href="\1">\1</a>', line)
283
284         # comment
285         if re.search(ur"^☆#", line):
286             line = ""
287
288         return line
289
290
291     def _ulist(self, line):
292         """Proccess ul"""
293         print "<ul>"
294         while re.search(ur"^・", line):
295             line = self._default_markup_rule(line)
296             print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
297             line = self.input_iter.next()
298         print "</ul>\n"
299
300     def _olist(self, line):
301         """Proccess ul"""
302         print "<ol>"
303         while re.search(ur"^[0-9]+\.", line):
304             line = self._default_markup_rule(line)
305             print re.sub(ur"^[0-9]+\.(.*)$", ur"<li>\1</li>", line.strip())
306             line = self.input_iter.next()
307         print "</ol>\n"
308
309
310
311     def _begin_column(self, line):
312         """Proccess column"""
313         try:
314             str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
315         except AttributeError:
316             str_title = ""
317
318         html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
319     <tr><th>%s</th></tr>
320     <tr><td><span style="font-size: 85%%;">
321     """ % (str_title)
322         print html
323
324     def _end_column(self, line):
325         print """</span></td></tr>
326     </table>
327     """
328
329     def _list_start(self):
330         return "<pre>"
331
332     def _list_end(self):
333         return "</pre>"
334
335     def _list(self, line):
336         try:
337             str_title = re.search(ur"^☆(リスト.*)$", line).group(1)
338         except AttributeError:
339             str_title = ""
340         print "<p><b>%s</b></p>" % (str_title)
341         print self._list_start()
342
343         for line in self.input_iter:
344             line = line.strip("\n\r")
345             line = self._escape(line)
346             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
347             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
348             if re.search(ur"""^☆\+---""", line):
349                 break
350             print line
351         print self._list_end()
352
353     def _code(self, line):
354         print self._list_start()
355
356         for line in self.input_iter:
357             line = self._escape(line)
358             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
359             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
360
361             if re.search(ur"^☆\+---$", line):
362                 break
363             print line,
364         print self._list_end()
365
366     def _inline(self, line):
367         for line in self.input_iter:
368             #        line = line.strip()
369             if re.search(ur"^☆}}}", line):
370                 break
371             print line
372
373     def _comment(self, line):
374         for line in self.input_iter:
375             line = line.strip()
376             if re.search(ur"^☆}}}", line):
377                 break
378
379     def _space(self, line):
380         print "<br><br>"
381
382     def _flow(self, line):
383         down_arrow = "http://static.sourceforge.jp/crystal/22x22/actions/1downarrow.png"
384         flow_header = """<div style="text-align:center; border: 1px solid; background-color:#EFF2F0; width:90%; margin: 0 auto 1em;">
385 """
386         flow_title = """<div style="text-align:left; padding:4px 4px 4px 1em; margin-bottom: 1em; border-bottom: 1px solid; font-weight: bold; background-color:#BCD;">
387 %s
388 </div>"""
389         flow_footer = """</div>
390 """
391         flow_item = """<div>
392 %s
393 <p>%s</p>
394 </div>
395
396 """
397         arrow = '<div style="margin:1em auto;"><img src="%s"></div>\n' % (down_arrow,)
398
399         rex_title = re.compile(ur"^☆flow\s+(.*)$")
400         if rex_title.search(line):
401             title = rex_title.search(line).group(1)
402         else:
403             title = ""
404
405         rex_file = re.compile(ur"^([^:]*):(.*)$")
406         outputs = []
407         for line in self.input_iter:
408             if re.search(r"^\s*$", line):
409                 break
410             match = rex_file.search(line)
411             if match:
412                 file = os.path.join(self._image_dir, match.group(1))
413                 cap = self._default_markup_rule(match.group(2))
414             else:
415                 continue
416             fig = self._anchored_fig(file, cap)
417             outputs.append(flow_item % (fig, cap))
418
419         print flow_header
420         print flow_title % (title,)
421         print arrow.join(outputs)
422         print flow_footer
423         
424
425     def _fig_start(self, cap="", styles=[], width=0, height=0):
426         params = dict(style="", tablewidth="")
427         if width != 0:
428             params["style"] = "width:%d;" % (width,)
429             params["tablewidth"] = 'width="%d"' % (width,)
430
431         if "lfloat" in styles:
432             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0" style="float:left; padding-left: 0.5em; %(style)s">
433     <tr> <td valign="top" align="center">
434     """ % params
435         elif "left" in styles:
436             return """<table %(tablewidth)s border="0" cellpadding="0" cellspacing="0" style="padding-left: 0.5em; %(style)s">
437     <tr> <td valign="top" align="center">
438     """ %params
439         else:
440             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0">
441     <tr> <td valign="top" align="center">
442     """ % params
443
444     def _fig_end(self, cap="", styles=[]):
445         return """</td> </tr>
446     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
447     %s
448     </span></td> </tr>
449     </table>
450     """ % (cap)
451
452     def _get_png_geom(self, filepath):
453         desc = deterfile.file(filepath)
454         try:
455             m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
456         except IndexError:
457             err = ",".join(desc)
458             raise Exception("deterfile error: %s, file: %s" % (err,filepath))
459         if m:
460             w = m.group(1)
461             h = m.group(2)
462             return (int(w), int(h))
463         else:
464             return None
465
466     def _fig(self, line):
467         try:
468             str_title = re.search(ur"^☆(図.*)$", line).group(1)
469         except AttributeError:
470             str_title = ""
471         if str_title.find(u"図*") == 0:
472             str_title = str_title.replace(u"図*", "")
473         line = self.input_iter.next()
474         styles = []
475         if line.find("@") == 0:
476             styles = line.strip().replace("@", "").split(",")
477             line = self.input_iter.next()
478
479         imgname = ""
480         imgname_s = ""
481         hash = ""
482         hash_s = ""
483         match_o1 = re.search(ur"<([^,]*?)>", line)
484         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
485         if not match_o1 == None:
486             imgname = match_o1.group(1)
487             imgname = os.path.join(self._image_dir, imgname)
488             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
489         elif not match_o2 == None:
490             imgname = match_o2.group(1)
491             imgname = os.path.join(self._image_dir, imgname)
492             imgname_s = match_o2.group(2)
493             imgname_s = os.path.join(self._image_dir, imgname_s)
494
495         geom = self._get_png_geom(imgname_s)
496         if geom:
497             w = geom[0]
498             h = geom[1]
499             print self._fig_start("", styles, width=w, height=h)
500         else:
501             print self._fig_start("", styles)
502         print self._anchored_fig(imgname, str_title, imgname_s)
503         print self._fig_end(str_title, styles);
504
505         dic = self.index("figs")
506         dic.append(imgname)
507         if imgname_s != "":
508             dic.append(imgname_s)
509
510     def _photo(self, line):
511         try:
512             str_title = re.search(ur"^☆(写真.*)$", line).group(1)
513         except AttributeError:
514             str_title = ""
515         if str_title.find(u"写真*") == 0:
516             str_title = str_title.replace(u"写真*", "")
517         line = self.input_iter.next()
518         styles = []
519         if line.find("@") == 0:
520             styles = line.strip().replace("@", "").split(",")
521             line = self.input_iter.next()
522
523         imgname = ""
524         imgname_s = ""
525         hash = ""
526         hash_s = ""
527         match_o1 = re.search(ur"<([^,]*?)>", line)
528         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
529         if not match_o1 == None:
530             imgname = match_o1.group(1)
531             imgname = os.path.join(self._image_dir, imgname)
532             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
533         elif not match_o2 == None:
534             imgname = match_o2.group(1)
535             imgname = os.path.join(self._image_dir, imgname)
536             imgname_s = match_o2.group(2)
537             imgname_s = os.path.join(self._image_dir, imgname_s)
538
539         geom = self._get_png_geom(imgname_s)
540         if geom:
541             w = geom[0]
542             h = geom[1]
543             print self._fig_start("", styles, width=w, height=h)
544         else:
545             print self._fig_start("", styles)
546         print self._anchored_fig(imgname, str_title, imgname_s)
547         print self._fig_end(str_title, styles);
548
549         dic = self.index("figs")
550         dic.append(imgname)
551         if imgname_s != "":
552             dic.append(imgname_s)
553
554         
555     def _anchored_fig(self, file, alt, file_s=""):
556
557         if file_s == "":
558             file_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", file)
559
560         if not os.path.isfile(file_s):
561             file_s = file
562
563         alt = re.sub(r"""<[A-Za-z0-9!/]+.*?>""", "", alt)
564
565         ret = """<a href="%s">
566   <img src="%s" alt="%s">
567 </a>
568 """ % (file, file_s, alt)
569
570
571         return ret
572         
573
574     def _fig_release(self, line):
575         try:
576             str_title = re.search(ur"^☆(図.*)$", line).group(1)
577         except AttributeError:
578             str_title = ""
579         print self._fig_start()
580
581         line = self.input_iter.next()
582         imgname = ""
583         imgname_s = ""
584         hash = ""
585         hash_s = ""
586         match_o1 = re.search(ur"<([^,]*?)>", line)
587         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
588         if not match_o1 == None:
589             imgname = match_o1.group(1)
590             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
591         elif not match_o2 == None:
592             imgname = match_o1.group(1)
593             imgname_s = match_o1.group(2)
594
595
596         hash = self.hashlist.get(imgname, "")
597         hash_s = self.hashlist.get(imgname_s, "")
598         if hash_s == "":
599             hash_s = hash
600
601         print """<a href="/blob.pl?id=%s">
602      <slash type="image" id="%s" title="%s">
603      </a>
604      """ % (hash, hash_s, str_title)
605         
606
607         dic = self.index("figs")
608         dic.append(imgname)
609         if imgname_s != "":
610             dic.append(imgname_s)
611
612         print self._fig_end(str_title);
613
614
615     def _table_start(self, cap):
616         return """<div style="width:90%%; margin-left:auto;margin-right:auto;"><table align="center" border="1" class="table" width="100%%">
617     <caption><b>%s</b></caption>
618     """ % cap
619
620     def _table_end(self, footnote=""):
621         return "</table>\n%s</div>\n" % (footnote,)
622
623     def _table(self, line):
624         str_title = ""
625         self._table_buf1 = ""
626
627         try:
628             str_title = re.search(ur"^☆(表.*)$", line).group(1)
629             fig_name =  re.search(ur"^☆(表[0-9A-Z]*)", line).group(1)
630         except AttributeError:
631             str_title = ""
632             fig_name = ""
633         if str_title.find(u"表*") == 0:
634             str_title = str_title.replace(u"表*", "")
635
636         print self._table_start(str_title)
637         self._table_buf1 =  self._table_start(str_title)
638
639         num_row = 0
640         table_contents = []
641         footnote = ""
642         for line in self.input_iter:
643             line = line.strip(" \n")
644             line = self._default_markup_rule(line)
645             if re.search(ur"^\s*$", line):
646                 break
647             if re.search(ur"^※", line):
648                 footnote = re.search(ur"^(※.*)$", line).group(1)
649                 break
650             if re.search(ur"^〓", line):
651                 line = re.sub(ur"^〓", "", line)
652                 tag_mode = "th"
653             else:
654                 tag_mode = "td"
655             table_contents.append([])
656             num_col = 0
657             for item in line.split("\t"):
658                 if item == "":
659                     if num_col == 0:
660                         n = 1
661                         try:
662                             while table_contents[num_row-n][num_col]["item"] == "":
663                                 n += 1
664                             table_contents[num_row-n][num_col]["row"] += 1
665                         except IndexError:
666                             pass
667                     else:
668                         n = 1
669                         try:
670                             while table_contents[num_row][num_col-n]["item"] == "":
671                                 n += 1
672                             table_contents[num_row][num_col-n]["col"] += 1
673                         except IndexError:
674                             pass
675                 if item == u"↓":
676                     n = 1
677                     try:
678                         while table_contents[num_row-n][num_col]["item"] == "":
679                             n += 1
680                         table_contents[num_row-n][num_col]["row"] += 1
681                         item = ""
682                     except IndexError:
683                         pass
684
685                 if re.search(r'^".*"$', item):
686                     item = re.search(r'^"(.*)"$', item).group(1)
687                     table_contents[num_row].append({"tag":"th","item":item,"row":1,"col":1})
688                 else:
689                     table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
690                 num_col = num_col + 1
691             num_row = num_row + 1
692
693         for row_item in table_contents:
694             line = "<tr>"
695             for item in row_item:
696                 if item["item"] == "":
697                     continue
698                 line = line + "<" + item["tag"]
699                 if not item["row"] == 1:
700                     line = line + (' rowspan="%s"' % item["row"])
701                 if not item["col"] == 1:
702                     line = line + (' colspan="%s"' % item["col"])
703                 line = line +  ">"
704                 line = line + item["item"]
705                 line = line + "</" + item["tag"] + ">"
706             line = line + "</tr>\n"
707             print line,
708             self._table_buf1 = self._table_buf1 + line
709
710             # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
711             # line = line.replace("\t", "</th><th>")
712             # print line
713             # else:
714             # line = "<tr><td>" + line + "</td></tr>"
715             # line = line.replace("\t", "</td><td>")
716             # print line
717
718         print self._table_end(footnote)
719         self._table_buf1 =  self._table_buf1 + self._table_end()
720         if self.index_haskey("tables"):
721             self.index("tables")[fig_name] = self._table_buf1
722         else:
723             self.index_add("tables", {fig_name:self._table_buf1})
724
725     def _call_tables(self, line):
726         try:
727             fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
728         except AttributeError:
729             return
730         print self.index("tables")[fig_name]