3 import java.util.regex.Pattern;
5 import org.xml.sax.helpers.DefaultHandler;
6 import org.xml.sax.Attributes;
14 * 説明: ニコニコ動画の動画をコメントつきで保存
18 * 著作権: Copyright (c) 2007 PSI
28 public class NicoXMLReader extends DefaultHandler {
29 private final Packet Packet;
33 private boolean item_kicked;
35 private final Pattern NG_Word;
37 private final Pattern NG_ID;
39 public NicoXMLReader(Packet packet, String ng_id, String ng_word) {
41 NG_Word = makePattern(ng_word);
42 NG_ID = makePattern(ng_id);
45 private static Pattern makePattern(String word) {
46 if (word == null || word.length() <= 0) {
49 final String tmp[] = word.split(" ");
50 String tmp2[] = new String[tmp.length];
53 for (index = 0; index < tmp.length && tmp_index < tmp.length; index++) {
54 if (tmp[tmp_index].startsWith("/")) {
55 final StringBuilder str = new StringBuilder(tmp[tmp_index]);
56 for (tmp_index++; tmp_index < tmp.length; tmp_index++) {
57 str.append(" ").append(tmp[tmp_index]);
58 if (tmp[tmp_index].endsWith("/")) {
63 tmp2[index] = str.toString();
64 } else if (tmp[tmp_index].startsWith("\"")) {
65 final StringBuilder str = new StringBuilder(tmp[tmp_index]);
66 for (tmp_index++; tmp_index < tmp.length; tmp_index++) {
67 str.append(" ").append(tmp[tmp_index]);
68 if (tmp[tmp_index].endsWith("\"")) {
73 tmp2[index] = str.toString();
75 tmp2[index] = tmp[tmp_index];
79 final String elt[] = new String[index];
80 System.arraycopy(tmp2, 0, elt, 0, index);
81 final StringBuilder reg = new StringBuilder();
82 for (int i = 0; i < elt.length; i++) {
83 final String e = elt[i];
84 System.out.println(e);
88 if (e.indexOf('/') == 0 && e.lastIndexOf('/') == e.length() - 1) {
89 reg.append("(").append(e.substring(1, e.length() - 1)).append(")");
90 } else if (e.indexOf('\"') == 0
91 && e.lastIndexOf('\"') == e.length() - 1) {
92 reg.append("(").append(Pattern.quote(e.substring(1, e.length() - 1)))
95 reg.append("(.*(").append(Pattern.quote(e)).append(")+.*)");
98 System.out.println("reg:" + reg);
99 return Pattern.compile(reg.toString());
102 private static boolean match(Pattern pat, String word) {
103 if (word == null || word.length() <= 0 || pat == null) {
106 return pat.matcher(word).matches();
113 public void startDocument() {
114 System.out.println("Start converting to interval file.");
129 public void startElement(final String uri, final String localName, final String qName,
130 final Attributes attributes) {
131 if (qName.equalsIgnoreCase("chat")) {
132 // System.out.println("----------");
136 final String deleted = attributes.getValue("deleted");
137 if(deleted != null && deleted.equalsIgnoreCase("1")){
141 item.setDate(attributes.getValue("date"));
142 final String mail = attributes.getValue("mail");
143 if (match(NG_Word, mail)) {
148 item.setNo(attributes.getValue("no"));
149 final String user_id = attributes.getValue("user_id");
150 if (match(NG_ID, user_id)) {
154 item.setUserID(user_id);
155 item.setVpos(attributes.getValue("vpos"));
170 public void characters(final char[] ch, final int offset, final int length) {
171 final char input[] = (new String(ch, offset, length)).toCharArray();
172 for (int i = 0; i < input.length; i++) {
173 if (!Character.isDefined(input[i])) {
178 final String com = new String(input);
179 if (match(NG_Word, com)) {
183 item.setComment(com);
197 public void endElement(final String uri, final String localName, final String qName) {
198 if (qName.equalsIgnoreCase("chat")) {
200 Packet.addChat(item);
210 public void endDocument() {
211 // System.out.println("----------");
212 System.out.println("Converting finished.");