1 # Copyright (C) 2006 by Aiwota Programmer
2 # aiwotaprog@tetteke.tk
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 from HTMLParserEx import HTMLParserEx
22 class BareHTMLParser(HTMLParserEx):
23 """Parses html by the minimal necessity
25 to_out_func format is:
26 def some_func(untied_data, is_bold, href):
27 where untied_data is non markuped string
28 and is_bold is whether untied_data is bold or not
29 and href is url anchor if exists
31 on_new_line format is:
34 strip spaces at the head and end of line, but first line's head is unable.
37 def __init__(self, to_out_func, on_new_line):
38 HTMLParserEx.__init__(self)
39 self.to_out_func = to_out_func
40 self.on_new_line = on_new_line
45 def reset_func(self, to_out_func):
47 self.to_out_func = to_out_func
49 def to_out(self, data):
52 data = data.lstrip(" ")
53 self.buffer = self.buffer + data
57 self.to_out_func(self.buffer, self.bold, self.href)
61 self.buffer = self.buffer.rstrip(" ")
68 HTMLParserEx.close(self)
73 def handle_starttag(self, tag, attr):
85 def handle_endtag(self, tag):
93 def handle_data(self, data):
96 def handle_charref(self, ref):
99 data = unichr(int(ref))
104 def handle_entityref(self, name):
105 if name in htmlentitydefs.name2codepoint:
106 codepoint = htmlentitydefs.name2codepoint[name]
107 self.to_out(unichr(codepoint))
109 self.to_out("&"+name+";")