def on_response(self, response):
data = response.read()
info = response.info()
+
+ # suppose same encoding used in dat files.
+ encoding = self.bbs_type.encoding
+ # if there is charset in response headers, use it.
if "Content-Type" in info:
import re
match = re.search(
if match:
charset = match.group("charset").lower()
- if charset in ("x-sjis", "x_sjis", "sjis", "shiftjis", "shift-jis",
- "shift_jis", "s-jis", "s_jis"):
- encoding = "cp932"
- elif charset in ("euc-jp", "euc_jp", "eucjp"):
- encoding = "euc-jp"
-
- if encoding:
- data = data.decode(encoding, "replace")
+ if charset in ("x-sjis", "x_sjis", "sjis", "shiftjis",
+ "shift-jis", "shift_jis", "s-jis", "s_jis"):
+ encoding = "cp932"
+ elif charset in ("euc-jp", "euc_jp", "eucjp"):
+ encoding = "euc-jp"
+ data = data.decode(encoding, "replace")
p = ConfirmationHTMLParser()
p.feed(data)
p.close()