1 # Copyright (C) 2006 by Aiwota Programmer
2 # aiwotaprog@tetteke.tk
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 from HTMLParserEx import HTMLParserEx
22 class BareHTMLParser(HTMLParserEx):
23 """Parses html by the minimal necessity
25 to_out_func format is:
26 def some_func(untied_data, is_bold, href):
27 where untied_data is non markuped string
28 and is_bold is whether untied_data is bold or not
29 and href is url anchor if exists
31 strip spaces at the head and end of line, but first line's head is unable.
34 def __init__(self, to_out_func):
35 HTMLParserEx.__init__(self)
36 self.to_out_func = to_out_func
41 def reset_func(self, to_out_func):
43 self.to_out_func = to_out_func
45 def to_out(self, data):
47 if n > 0 and self.buffer[n-1] == "\n":
48 data = data.lstrip(" ")
49 self.buffer = self.buffer + data
53 self.to_out_func(self.buffer, self.bold, self.href)
57 self.buffer = self.buffer.rstrip(" ")
63 HTMLParserEx.close(self)
68 def handle_starttag(self, tag, attr):
80 def handle_endtag(self, tag):
88 def handle_data(self, data):
91 def handle_charref(self, ref):
94 data = unichr(int(ref))
99 def handle_entityref(self, name):
100 if name in htmlentitydefs.name2codepoint:
101 codepoint = htmlentitydefs.name2codepoint[name]
102 self.to_out(unichr(codepoint))
104 self.to_out("&"+name+";")