markupper.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 import sys
   5 import os
   6 import re
   7 import codecs
   8 import pickle
   9
  10 import HTMLTagFilter
  11 import deterfile
  12
  13 #sys.stdin = codecs.getreader('utf_8')(sys.stdin)
  14 #sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
  15
  16 alist = ["a", "a:href", "a:name", "b", "br" ]
  17 dlist = ["*"]
  18
  19 tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
  20 path_to_index = "./_markup_index"
  21
  22 class _InputStream(object):
  23     """InputStream base class."""
  24     def __init__(self):
  25         pass
  26
  27     def __iter__(self):
  28         """return Iterator"""
  29         return self
  30
  31     def next(self):
  32         """function for iterator"""
  33         pass
  34
  35
  36 class Markupper(object):
  37     """
  38     """
  39     def __init__(self):
  40         self._input_iter = None
  41         self._index_past = {}
  42         self._index = {}
  43         self._image_dir = ""
  44
  45     def index_add(self, key, val):
  46         """
  47         Add key and value to index.
  48
  49         @param key:
  50 　        @type key:
  51
  52         @param val:
  53         @key val:
  54         """
  55         self._index[key] = val
  56
  57     def index(self, key):
  58         """
  59         Get index
  60         """
  61         return self._index[key]
  62
  63     def index_haskey(self, key):
  64         return self._index.has_key(key)
  65
  66     def markup(self, input_iter, release="0"):
  67         """
  68         Do markup.
  69
  70         @param input_iter: iterator to use as input
  71         @type input_iter: iterator
  72         """
  73         self.input_iter = input_iter
  74         self._page_counter = 1
  75         # alist = ["a", "a:href", "a:name", "b", "br" ]
  76         # dlist = ["*"]
  77         # tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
  78         self._release = release
  79
  80         self.index_add("figs", [])
  81
  82         self._anchor = ""
  83         for line in self.input_iter:
  84             # line = self._default_markup_rule(line)
  85             # head-of-line rules
  86             if re.search(ur"^☆{{{$", line):
  87                 self._inline(line)
  88                 continue
  89             elif re.search(ur"^☆image_dir:", line):
  90                 self._image_dir = re.search(ur"^☆image_dir:\s*(.*)$", line).group(1)
  91                 continue
  92             elif re.search(ur"^☆comment\s{{{$", line):
  93                 self._comment(line)
  94                 continue
  95             elif re.search(ur"^☆\*", line):
  96                 self._anchor = re.sub(ur"^☆\*", "", line).strip()
  97                 continue
  98             elif re.search(ur"^☆clear\s+", line):
  99                 self._clear(line)
 100                 continue
 101             elif re.search(ur"^・", line):
 102                 self._ulist(line)
 103                 continue
 104             elif re.search(ur"^[0-9]\.", line):
 105                 self._olist(line)
 106                 continue
 107             elif re.search(ur"^☆begin-column:", line):
 108                 self._begin_column(line)
 109                 continue
 110             elif re.search(ur"^☆end-column", line):
 111                 self._end_column(line)
 112                 continue
 113             elif re.search(ur"^☆space", line):
 114                 self._space(line)
 115                 continue
 116             elif re.search(ur"^☆call_tables", line):
 117                 self._call_tables(line)
 118                 continue
 119             elif re.search(ur"^●", line):
 120                 self._head_l(line)
 121                 continue
 122             elif re.search(ur"^○", line):
 123                 self._head_m(line)
 124                 continue
 125             elif re.search(ur"^☆----", line):
 126                 self._newpage(line)
 127                 continue
 128             elif re.search(ur"^☆\+---", line):
 129                 self._code(line)
 130                 continue
 131             elif re.search(ur"^☆表", line):
 132                 self._table(line)
 133                 continue
 134             elif re.search(ur"^☆図", line):
 135                 self._fig(line)
 136                 continue
 137             elif re.search(ur"^☆リスト", line):
 138                 self._list(line)
 139                 continue
 140             elif re.search(ur"^☆flow", line):
 141                 self._flow(line)
 142                 continue
 143
 144             if re.search(ur"^　", line):
 145                 self._paragraph(line)
 146                 continue
 147
 148
 149             if re.search(r"^\s*$", line):
 150                 line = ""
 151
 152             line = line.strip()
 153             print line
 154
 155         # end-of-loop
 156
 157     def _clear(self, line):
 158         print """<div style="clear:left;"> </div>
 159 """
 160
 161     def _head_l(self, line):
 162         line = line.rstrip()
 163         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
 164             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
 165             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
 166
 167         line = self._default_markup_rule(line)
 168         if self._anchor != "":
 169             line = re.sub(ur"^●(.*)$", ur'<div id="%s"><h3>\1</h3></div>' % self._anchor, line)
 170             self._anchor = ""
 171         else:
 172             line = re.sub(ur"^●(.*)$", ur"<h3>\1</h3>", line)
 173         print line
 174
 175     def _head_m(self, line):
 176         line = line.rstrip()
 177         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
 178             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
 179             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
 180
 181         line = self._default_markup_rule(line)
 182         if self._anchor != "":
 183             line = re.sub(ur"^○(.*)$", ur'<div id="%s"><h4>\1</h4></div>' % self._anchor, line)
 184             self._anchor = ""
 185         else:
 186             line = re.sub(ur"^○(.*)$", ur"<h4>\1</h4>", line)
 187         print line
 188
 189     def _paragraph(self, line):
 190         line = self._default_markup_rule(line)
 191         line = "<p>" + line + "</p>"
 192         print line
 193
 194     def _newpage(self, line):
 195         line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
 196         print line
 197
 198     def load_index(self, path_to_index):
 199         """
 200         load index database.
 201
 202         @param path_to_index: index db's path
 203         @type path_to_index: string
 204         """
 205         # load index
 206         try:
 207             index_file = open(path_to_index, "r")
 208             self._index_past = pickle.load(index_file)
 209             index_file.close()
 210         except IOError:
 211             sys.stderr.write("warn: cannot read index file,\n")
 212
 213     def save_index(self, path_to_index):
 214         """
 215         save index database.
 216
 217         @param path_to_index: index db's path
 218         @type path_to_index: string
 219         """
 220         # save index
 221         try:
 222             index_file = open(path_to_index, "w")
 223             pickle.dump(self._index, index_file)
 224             index_file.close()
 225         except IOError:
 226             sys.stderr.write("warn: cannot write index file,\n")
 227
 228     def make_hashlist(self, path_to_hashfile):
 229         """
 230         create hash list.
 231
 232         @param path_to_hashfile: hashfile's path
 233         @type path_to_hashfile: string
 234         """
 235         try:
 236             file_img_hash = open(path_to_hashfile, "r")
 237         except IOError:
 238             sys.stderr.write("cannot open file: %s" % path_img_hash)
 239             return None;
 240
 241         self.hashlist = {};
 242         for line in file_img_hash:
 243             splited = line.strip().split("\t", 2)
 244             # hashlist's format: <hash> \t <filename>
 245             self.hashlist[splited[1]] = splited[0]
 246
 247     def _call_tagles(self):
 248         pass
 249
 250     def _escape(self, line):
 251         line = re.sub(ur"&", ur"&amp", line)
 252         line = re.sub(ur"<", ur"&lt;", line)
 253         line = re.sub(ur">", ur"&gt;", line)
 254         return line
 255
 256     def _default_markup_rule(self, line):
 257         """
 258         apply default markup rules.
 259
 260         @param line: string to apply markup
 261         @type line: string
 262         """
 263         line = self._escape(line)
 264
 265         # apply filter
 266         # line = tag_filter.apply(line)
 267
 268         line = re.sub(ur"[★*](表[0-9～、]+)", ur"<b>\1</b>", line)
 269         line = re.sub(ur"[★*](図[0-9～、]+)", ur"<b>\1</b>", line)
 270         line = re.sub(ur"[★*](リスト[0-9～、]+)", ur"<b>\1</b>", line)
 271         line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
 272         line = re.sub(ur"[★*]b\{(.*?)\}", ur"<b>\1</b>", line)
 273         line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
 274         line = re.sub(ur"[★*]\[(\S*)\]", r'<a href="\1">\1</a>', line)
 275
 276         # comment
 277         if re.search(ur"^☆#", line):
 278             line = ""
 279
 280         return line
 281
 282
 283     def _ulist(self, line):
 284         """Proccess ul"""
 285         print "<ul>"
 286         while re.search(ur"^・", line):
 287             line = self._default_markup_rule(line)
 288             print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
 289             line = self.input_iter.next()
 290         print "</ul>\n"
 291
 292     def _olist(self, line):
 293         """Proccess ul"""
 294         print "<ol>"
 295         while re.search(ur"^[0-9]+\.", line):
 296             line = self._default_markup_rule(line)
 297             print re.sub(ur"^[0-9]+\.(.*)$", ur"<li>\1</li>", line.strip())
 298             line = self.input_iter.next()
 299         print "</ol>\n"
 300
 301
 302
 303     def _begin_column(self, line):
 304         """Proccess column"""
 305         try:
 306             str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
 307         except AttributeError:
 308             str_title = ""
 309
 310         html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
 311     <tr><th>%s</th></tr>
 312     <tr><td><span style="font-size: 85%%;">
 313     """ % (str_title)
 314         print html
 315
 316     def _end_column(self, line):
 317         print """</span></td></tr>
 318     </table>
 319     """
 320
 321     def _list_start(self):
 322         return "<pre>"
 323
 324     def _list_end(self):
 325         return "</pre>"
 326
 327     def _list(self, line):
 328         try:
 329             str_title = re.search(ur"^☆(リスト.*)$", line).group(1)
 330         except AttributeError:
 331             str_title = ""
 332         print "<p><b>%s</b></p>" % (str_title)
 333         print self._list_start()
 334
 335         for line in self.input_iter:
 336             line = line.strip("\n\r")
 337             line = self._escape(line)
 338             if re.search(ur"""^☆\+---""", line):
 339                 break
 340             print line
 341         print self._list_end()
 342
 343     def _code(self, line):
 344         print self._list_start()
 345
 346         for line in self.input_iter:
 347             line = self._escape(line)
 348             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
 349             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
 350
 351             if re.search(ur"^☆\+---$", line):
 352                 break
 353             print line,
 354         print self._list_end()
 355
 356     def _inline(self, line):
 357         for line in self.input_iter:
 358             #        line = line.strip()
 359             if re.search(ur"^☆}}}", line):
 360                 break
 361             print line
 362
 363     def _comment(self, line):
 364         for line in self.input_iter:
 365             line = line.strip()
 366             if re.search(ur"^☆}}}", line):
 367                 break
 368
 369     def _space(self, line):
 370         print "<br><br>"
 371
 372     def _flow(self, line):
 373         down_arrow = "http://static.sourceforge.jp/crystal/22x22/actions/1downarrow.png"
 374         flow_header = """<div style="text-align:center; border: 1px solid; background-color:#EFF2F0; width:90%; margin: 0 auto 1em;">
 375 """
 376         flow_title = """<div style="text-align:left; padding:4px 4px 4px 1em; margin-bottom: 1em; border-bottom: 1px solid; font-weight: bold; background-color:#BCD;">
 377 %s
 378 </div>"""
 379         flow_footer = """</div>
 380 """
 381         flow_item = """<div>
 382 %s
 383 <p>%s</p>
 384 </div>
 385
 386 """
 387         arrow = '<div style="margin:1em auto;"><img src="%s"></div>\n' % (down_arrow,)
 388
 389         rex_title = re.compile(ur"^☆flow\s+(.*)$")
 390         if rex_title.search(line):
 391             title = rex_title.search(line).group(1)
 392         else:
 393             title = ""
 394
 395         rex_file = re.compile(ur"^([^:]*):(.*)$")
 396         outputs = []
 397         for line in self.input_iter:
 398             if re.search(r"^\s*$", line):
 399                 break
 400             match = rex_file.search(line)
 401             if match:
 402                 file = os.path.join(self._image_dir, match.group(1))
 403                 cap = self._default_markup_rule(match.group(2))
 404             else:
 405                 continue
 406             fig = self._anchored_fig(file, cap)
 407             outputs.append(flow_item % (fig, cap))
 408
 409         print flow_header
 410         print flow_title % (title,)
 411         print arrow.join(outputs)
 412         print flow_footer
 413
 414
 415     def _fig_start(self, cap="", styles=[], width=0, height=0):
 416         params = dict(style="", tablewidth="")
 417         if width != 0:
 418             params["style"] = "width:%d;" % (width,)
 419             params["tablewidth"] = 'width="%d"' % (width,)
 420
 421         if "lfloat" in styles:
 422             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0" style="float:left; padding-left: 0.5em; %(style)s">
 423     <tr> <td valign="top" align="center">
 424     """ % params
 425         elif "left" in styles:
 426             return """<table %(tablewidth)s border="0" cellpadding="0" cellspacing="0" style="padding-left: 0.5em; %(style)s">
 427     <tr> <td valign="top" align="center">
 428     """ %params
 429         else:
 430             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0">
 431     <tr> <td valign="top" align="center">
 432     """ % params
 433
 434     def _fig_end(self, cap="", styles=[]):
 435         return """</td> </tr>
 436     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
 437     %s
 438     </span></td> </tr>
 439     </table>
 440     """ % (cap)
 441
 442     def _fig(self, line):
 443         if self._release == 1:
 444             self._fig_release(line)
 445         else:
 446             self._fig(line)
 447
 448     def _get_png_geom(self, filepath):
 449         desc = deterfile.file(filepath)
 450         try:
 451             m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
 452         except IndexError:
 453             err = ",".join(desc)
 454             raise Exception("deterfile error: %s, file: %s" % (err,filepath))
 455         if m:
 456             w = m.group(1)
 457             h = m.group(2)
 458             return (int(w), int(h))
 459         else:
 460             return None
 461
 462     def _fig(self, line):
 463         try:
 464             str_title = re.search(ur"^☆(図.*)$", line).group(1)
 465         except AttributeError:
 466             str_title = ""
 467         if str_title.find(u"図*") == 0:
 468             str_title = str_title.replace(u"図*", "")
 469         line = self.input_iter.next()
 470         styles = []
 471         if line.find("@") == 0:
 472             styles = line.strip().replace("@", "").split(",")
 473             line = self.input_iter.next()
 474
 475         imgname = ""
 476         imgname_s = ""
 477         hash = ""
 478         hash_s = ""
 479         match_o1 = re.search(ur"<([^,]*?)>", line)
 480         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 481         if not match_o1 == None:
 482             imgname = match_o1.group(1)
 483             imgname = os.path.join(self._image_dir, imgname)
 484             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
 485         elif not match_o2 == None:
 486             imgname = match_o2.group(1)
 487             imgname = os.path.join(self._image_dir, imgname)
 488             imgname_s = match_o2.group(2)
 489
 490         geom = self._get_png_geom(imgname_s)
 491         if geom:
 492             w = geom[0]
 493             h = geom[1]
 494             print self._fig_start("", styles, width=w, height=h)
 495         else:
 496             print self._fig_start("", styles)
 497         print self._anchored_fig(imgname, str_title, imgname_s)
 498         print self._fig_end(str_title, styles);
 499
 500         dic = self.index("figs")
 501         dic.append(imgname)
 502         if imgname_s != "":
 503             dic.append(imgname_s)
 504
 505
 506     def _anchored_fig(self, file, alt, file_s=""):
 507
 508         if file_s == "":
 509             file_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", file)
 510
 511         if not os.path.isfile(file_s):
 512             file_s = file
 513
 514         alt = re.sub(r"""<[A-Za-z0-9!/]+.*?>""", "", alt)
 515
 516         return """<a href="%s">
 517   <img src="%s" alt="%s">
 518 </a>
 519 """ % (file, file_s, alt)
 520
 521
 522     def _fig_release(self, line):
 523         try:
 524             str_title = re.search(ur"^☆(図.*)$", line).group(1)
 525         except AttributeError:
 526             str_title = ""
 527         print self._fig_start()
 528
 529         line = self.input_iter.next()
 530         imgname = ""
 531         imgname_s = ""
 532         hash = ""
 533         hash_s = ""
 534         match_o1 = re.search(ur"<([^,]*?)>", line)
 535         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 536         if not match_o1 == None:
 537             imgname = match_o1.group(1)
 538             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
 539         elif not match_o2 == None:
 540             imgname = match_o1.group(1)
 541             imgname_s = match_o1.group(2)
 542
 543
 544         hash = self.hashlist.get(imgname, "")
 545         hash_s = self.hashlist.get(imgname_s, "")
 546         if hash_s == "":
 547             hash_s = hash
 548
 549         print """<a href="/blob.pl?id=%s">
 550      <slash type="image" id="%s" title="%s">
 551      </a>
 552      """ % (hash, hash_s, str_title)
 553
 554
 555         dic = self.index("figs")
 556         dic.append(imgname)
 557         if imgname_s != "":
 558             dic.append(imgname_s)
 559
 560         print self._fig_end(str_title);
 561
 562
 563     def _table_start(self, cap):
 564         return """<div style="width:90%%; margin-left:auto;margin-right:auto;"><table align="center" border="1" class="table">
 565     <caption><b>%s</b></caption>
 566     """ % cap
 567
 568     def _table_end(self, footnote=""):
 569         return "</table>\n%s</div>\n" % (footnote,)
 570
 571     def _table(self, line):
 572         str_title = ""
 573         self._table_buf1 = ""
 574
 575         try:
 576             str_title = re.search(ur"^☆(表.*)$", line).group(1)
 577             fig_name =  re.search(ur"^☆(表[0-9A-Z]*)", line).group(1)
 578         except AttributeError:
 579             str_title = ""
 580             fig_name = ""
 581         if str_title.find(u"表*") == 0:
 582             str_title = str_title.replace(u"表*", "")
 583
 584         print self._table_start(str_title)
 585         self._table_buf1 =  self._table_start(str_title)
 586
 587         num_row = 0
 588         table_contents = []
 589         footnote = ""
 590         for line in self.input_iter:
 591             line = line.strip(" \n")
 592             line = self._default_markup_rule(line)
 593             if re.search(ur"^\s*$", line):
 594                 break
 595             if re.search(ur"^※", line):
 596                 footnote = re.search(ur"^(※.*)$", line).group(1)
 597                 break
 598             line = self._default_markup_rule(line)
 599             if re.search(ur"^〓", line):
 600                 line = re.sub(ur"^〓", "", line)
 601                 tag_mode = "th"
 602             else:
 603                 tag_mode = "td"
 604             table_contents.append([])
 605             num_col = 0
 606             for item in line.split("\t"):
 607                 if item == "":
 608                     if num_col == 0:
 609                         n = 1
 610                         try:
 611                             while table_contents[num_row-n][num_col]["item"] == "":
 612                                 n += 1
 613                             table_contents[num_row-n][num_col]["row"] += 1
 614                         except IndexError:
 615                             pass
 616                     else:
 617                         n = 1
 618                         try:
 619                             while table_contents[num_row][num_col-n]["item"] == "":
 620                                 n += 1
 621                             table_contents[num_row][num_col-n]["col"] += 1
 622                         except IndexError:
 623                             pass
 624
 625                 table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
 626                 num_col = num_col + 1
 627             num_row = num_row + 1
 628
 629         for row_item in table_contents:
 630             line = "<tr>"
 631             for item in row_item:
 632                 if item["item"] == "":
 633                     continue
 634                 line = line + "<" + item["tag"]
 635                 if not item["row"] == 1:
 636                     line = line + (' rowspan="%s"' % item["row"])
 637                 if not item["col"] == 1:
 638                     line = line + (' colspan="%s"' % item["col"])
 639                 line = line +  ">"
 640                 line = line + item["item"]
 641                 line = line + "</" + item["tag"] + ">"
 642             line = line + "</tr>\n"
 643             print line,
 644             self._table_buf1 = self._table_buf1 + line
 645
 646             # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
 647             # line = line.replace("\t", "</th><th>")
 648             # print line
 649             # else:
 650             # line = "<tr><td>" + line + "</td></tr>"
 651             # line = line.replace("\t", "</td><td>")
 652             # print line
 653
 654         print self._table_end(footnote)
 655         self._table_buf1 =  self._table_buf1 + self._table_end()
 656         if self.index_haskey("tables"):
 657             self.index("tables")[fig_name] = self._table_buf1
 658         else:
 659             self.index_add("tables", {fig_name:self._table_buf1})
 660
 661     def _call_tables(self, line):
 662         try:
 663             fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
 664         except AttributeError:
 665             return
 666         print self.index("tables")[fig_name]