3 import java.util.EnumSet;
5 import java.util.regex.Pattern;
7 import org.slf4j.Logger;
8 import org.slf4j.LoggerFactory;
9 import org.xml.sax.helpers.DefaultHandler;
10 import org.xml.sax.Attributes;
18 * 説明: ニコニコ動画の動画をコメントつきで保存
22 * 著作権: Copyright (c) 2007 PSI
32 public class NicoXMLReader extends DefaultHandler {
34 private static final Logger logger = LoggerFactory.getLogger(NicoXMLReader.class);
35 private final Set<CommentType> processTypes;
36 private final Packet packet;
37 private final Pattern ngWord;
38 private final Pattern ngId;
39 private String mainThreadId;
41 private boolean itemKicked;
45 * @param types 処理するコメントの種別.
48 * @param ng_word NG ワード.
50 public NicoXMLReader(Set<CommentType> types, Packet packet, String ng_id, String ng_word) {
51 this.processTypes = EnumSet.copyOf(types);
53 this.ngWord = makePattern(ng_word);
54 this.ngId = makePattern(ng_id);
57 private static Pattern makePattern(String word) {
58 if (word == null || word.length() <= 0) {
61 final String tmp[] = word.split(" ");
62 String tmp2[] = new String[tmp.length];
65 for (index = 0; index < tmp.length && tmp_index < tmp.length; index++) {
66 if (tmp[tmp_index].startsWith("/")) {
67 final StringBuilder str = new StringBuilder(tmp[tmp_index]);
68 for (tmp_index++; tmp_index < tmp.length; tmp_index++) {
69 str.append(" ").append(tmp[tmp_index]);
70 if (tmp[tmp_index].endsWith("/")) {
75 tmp2[index] = str.toString();
76 } else if (tmp[tmp_index].startsWith("\"")) {
77 final StringBuilder str = new StringBuilder(tmp[tmp_index]);
78 for (tmp_index++; tmp_index < tmp.length; tmp_index++) {
79 str.append(" ").append(tmp[tmp_index]);
80 if (tmp[tmp_index].endsWith("\"")) {
85 tmp2[index] = str.toString();
87 tmp2[index] = tmp[tmp_index];
91 final String elt[] = new String[index];
92 System.arraycopy(tmp2, 0, elt, 0, index);
93 final StringBuilder reg = new StringBuilder();
94 for (int i = 0; i < elt.length; i++) {
95 final String e = elt[i];
100 if (e.indexOf('/') == 0 && e.lastIndexOf('/') == e.length() - 1) {
101 reg.append("(").append(e.substring(1, e.length() - 1)).append(")");
102 } else if (e.indexOf('\"') == 0
103 && e.lastIndexOf('\"') == e.length() - 1) {
104 reg.append("(").append(Pattern.quote(e.substring(1, e.length() - 1))).append(")");
106 reg.append("(.*(").append(Pattern.quote(e)).append(")+.*)");
109 logger.debug("reg:" + reg);
110 return Pattern.compile(reg.toString());
113 private static boolean match(Pattern pat, String word) {
114 if (word == null || word.length() <= 0 || pat == null) {
117 return pat.matcher(word).matches();
124 public void startDocument() {
125 logger.debug("Start converting to interval file.");
140 public void startElement(final String uri, final String localName, final String qName,
141 final Attributes attributes) {
142 if (qName.equalsIgnoreCase("chat")) {
146 // 通常コメントを処理するか, 投稿者コメントを処理するか
147 final String fork = attributes.getValue("fork");
148 final boolean isOwner = "1".equals(fork);
149 final String th = attributes.getValue("thread");
151 // 初めて見つけたthreadをメインスレッド(非オプショナルスレッド)とみなす
152 if (mainThreadId == null) {
156 final boolean isOptional = (mainThreadId == null) ? false : !mainThreadId.equals(th);
158 if(!processTypes.contains(CommentType.valueOf(isOwner, isOptional))){
164 final String deleted = attributes.getValue("deleted");
165 if (deleted != null && deleted.equalsIgnoreCase("1")) {
169 item.setDate(attributes.getValue("date"));
170 final String mail = attributes.getValue("mail");
171 if (match(ngWord, mail)) {
176 item.setNo(attributes.getValue("no"));
177 final String user_id = attributes.getValue("user_id");
178 if (match(ngId, user_id)) {
182 item.setUserID(user_id);
183 item.setVpos(attributes.getValue("vpos"));
198 public void characters(final char[] ch, final int offset, final int length) {
199 final char input[] = (new String(ch, offset, length)).toCharArray();
200 for (int i = 0; i < input.length; i++) {
201 if (!Character.isDefined(input[i])) {
206 final String com = new String(input);
207 if (match(ngWord, com)) {
211 item.setComment(com);
225 public void endElement(final String uri, final String localName, final String qName) {
226 if (qName.equalsIgnoreCase("chat")) {
228 packet.addChat(item);
238 public void endDocument() {
239 logger.debug("Converting finished.");