3 import java.util.EnumSet;
5 import java.util.regex.Pattern;
7 import org.slf4j.Logger;
8 import org.slf4j.LoggerFactory;
9 import org.xml.sax.helpers.DefaultHandler;
10 import org.xml.sax.Attributes;
18 * 説明: ニコニコ動画の動画をコメントつきで保存
22 * 著作権: Copyright (c) 2007 PSI
32 public class NicoXMLReader extends DefaultHandler {
34 private static final Logger logger = LoggerFactory.getLogger(NicoXMLReader.class);
35 private final Set<ProcessType> processTypes;
36 private final Packet packet;
37 private final Pattern ngWord;
38 private final Pattern ngId;
40 private boolean itemKicked;
42 public enum ProcessType {
47 public NicoXMLReader(Set<ProcessType> types, Packet packet, String ng_id, String ng_word) {
48 this.processTypes = EnumSet.copyOf(types);
50 this.ngWord = makePattern(ng_word);
51 this.ngId = makePattern(ng_id);
54 private static Pattern makePattern(String word) {
55 if (word == null || word.length() <= 0) {
58 final String tmp[] = word.split(" ");
59 String tmp2[] = new String[tmp.length];
62 for (index = 0; index < tmp.length && tmp_index < tmp.length; index++) {
63 if (tmp[tmp_index].startsWith("/")) {
64 final StringBuilder str = new StringBuilder(tmp[tmp_index]);
65 for (tmp_index++; tmp_index < tmp.length; tmp_index++) {
66 str.append(" ").append(tmp[tmp_index]);
67 if (tmp[tmp_index].endsWith("/")) {
72 tmp2[index] = str.toString();
73 } else if (tmp[tmp_index].startsWith("\"")) {
74 final StringBuilder str = new StringBuilder(tmp[tmp_index]);
75 for (tmp_index++; tmp_index < tmp.length; tmp_index++) {
76 str.append(" ").append(tmp[tmp_index]);
77 if (tmp[tmp_index].endsWith("\"")) {
82 tmp2[index] = str.toString();
84 tmp2[index] = tmp[tmp_index];
88 final String elt[] = new String[index];
89 System.arraycopy(tmp2, 0, elt, 0, index);
90 final StringBuilder reg = new StringBuilder();
91 for (int i = 0; i < elt.length; i++) {
92 final String e = elt[i];
97 if (e.indexOf('/') == 0 && e.lastIndexOf('/') == e.length() - 1) {
98 reg.append("(").append(e.substring(1, e.length() - 1)).append(")");
99 } else if (e.indexOf('\"') == 0
100 && e.lastIndexOf('\"') == e.length() - 1) {
101 reg.append("(").append(Pattern.quote(e.substring(1, e.length() - 1))).append(")");
103 reg.append("(.*(").append(Pattern.quote(e)).append(")+.*)");
106 logger.debug("reg:" + reg);
107 return Pattern.compile(reg.toString());
110 private static boolean match(Pattern pat, String word) {
111 if (word == null || word.length() <= 0 || pat == null) {
114 return pat.matcher(word).matches();
121 public void startDocument() {
122 logger.debug("Start converting to interval file.");
137 public void startElement(final String uri, final String localName, final String qName,
138 final Attributes attributes) {
139 if (qName.equalsIgnoreCase("chat")) {
143 // 通常コメントを処理するか, 投稿者コメントを処理するか
144 final String fork = attributes.getValue("fork");
145 final boolean isOwner = "1".equals(fork);
147 if (!processTypes.contains(ProcessType.OWNER)) {
151 if (!processTypes.contains(ProcessType.NORMAL)) {
157 final String deleted = attributes.getValue("deleted");
158 if (deleted != null && deleted.equalsIgnoreCase("1")) {
162 item.setDate(attributes.getValue("date"));
163 final String mail = attributes.getValue("mail");
164 if (match(ngWord, mail)) {
169 item.setNo(attributes.getValue("no"));
170 final String user_id = attributes.getValue("user_id");
171 if (match(ngId, user_id)) {
175 item.setUserID(user_id);
176 item.setVpos(attributes.getValue("vpos"));
191 public void characters(final char[] ch, final int offset, final int length) {
192 final char input[] = (new String(ch, offset, length)).toCharArray();
193 for (int i = 0; i < input.length; i++) {
194 if (!Character.isDefined(input[i])) {
199 final String com = new String(input);
200 if (match(ngWord, com)) {
204 item.setComment(com);
218 public void endElement(final String uri, final String localName, final String qName) {
219 if (qName.equalsIgnoreCase("chat")) {
221 packet.addChat(item);
231 public void endDocument() {
232 logger.debug("Converting finished.");