1 # Copyright (C) 2006 by Aiwota Programmer
2 # aiwotaprog@tetteke.tk
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 from HTMLParser import HTMLParser
22 class BareHTMLParser(HTMLParser):
23 """Parses html by the minimal necessity
25 to_out_func format is:
26 def some_func(untied_data, is_bold, href):
27 where untied_data is non markuped string
28 and is_bold is whether untied_data is bold or not
29 and href is url anchor if exists
32 def __init__(self, to_out_func):
33 HTMLParser.__init__(self)
34 self.to_out_func = to_out_func
38 def reset_func(self, to_out_func):
39 self.to_out_func = to_out_func
41 def to_out(self, data):
42 self.to_out_func(data, self.bold, self.href)
44 # handle_* are overriden methods
46 def handle_starttag(self, tag, attr):
56 def handle_endtag(self, tag):
62 def handle_data(self, data):
65 def handle_charref(self, ref):
68 data = unichr(int(ref))
73 def handle_entityref(self, name):
74 if name in htmlentitydefs.name2codepoint:
75 codepoint = htmlentitydefs.name2codepoint[name]
76 self.to_out(unichr(codepoint))
78 self.to_out("&"+name+";")