package saccubus.conv;
import java.util.EnumSet;
import java.util.Set;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.Attributes;
/**
*
* タイトル: さきゅばす
*
*
*
* 説明: ニコニコ動画の動画をコメントつきで保存
*
*
*
* 著作権: Copyright (c) 2007 PSI
*
*
*
* 会社名:
*
*
* @author 未入力
* @version 1.0
*/
public class NicoXMLReader extends DefaultHandler {
private static final Logger logger = LoggerFactory.getLogger(NicoXMLReader.class);
private final Set processTypes;
private final Packet packet;
private final Pattern ngWord;
private final Pattern ngId;
private String mainThreadId;
private Chat item;
private boolean itemKicked;
/**
* コンストラクタ.
* @param types 処理するコメントの種別.
* @param packet
* @param ng_id NG ID.
* @param ng_word NG ワード.
*/
public NicoXMLReader(Set types, Packet packet, String ng_id, String ng_word) {
this.processTypes = EnumSet.copyOf(types);
this.packet = packet;
this.ngWord = makePattern(ng_word);
this.ngId = makePattern(ng_id);
}
private static Pattern makePattern(String word) {
if (word == null || word.length() <= 0) {
return null;
}
final String tmp[] = word.split(" ");
String tmp2[] = new String[tmp.length];
int tmp_index = 0;
int index;
for (index = 0; index < tmp.length && tmp_index < tmp.length; index++) {
if (tmp[tmp_index].startsWith("/")) {
final StringBuilder str = new StringBuilder(tmp[tmp_index]);
for (tmp_index++; tmp_index < tmp.length; tmp_index++) {
str.append(" ").append(tmp[tmp_index]);
if (tmp[tmp_index].endsWith("/")) {
tmp_index++;
break;
}
}
tmp2[index] = str.toString();
} else if (tmp[tmp_index].startsWith("\"")) {
final StringBuilder str = new StringBuilder(tmp[tmp_index]);
for (tmp_index++; tmp_index < tmp.length; tmp_index++) {
str.append(" ").append(tmp[tmp_index]);
if (tmp[tmp_index].endsWith("\"")) {
tmp_index++;
break;
}
}
tmp2[index] = str.toString();
} else {
tmp2[index] = tmp[tmp_index];
tmp_index++;
}
}
final String elt[] = new String[index];
System.arraycopy(tmp2, 0, elt, 0, index);
final StringBuilder reg = new StringBuilder();
for (int i = 0; i < elt.length; i++) {
final String e = elt[i];
logger.debug(e);
if (i > 0) {
reg.append("|");
}
if (e.indexOf('/') == 0 && e.lastIndexOf('/') == e.length() - 1) {
reg.append("(").append(e.substring(1, e.length() - 1)).append(")");
} else if (e.indexOf('\"') == 0
&& e.lastIndexOf('\"') == e.length() - 1) {
reg.append("(").append(Pattern.quote(e.substring(1, e.length() - 1))).append(")");
} else {
reg.append("(.*(").append(Pattern.quote(e)).append(")+.*)");
}
}
logger.debug("reg:" + reg);
return Pattern.compile(reg.toString());
}
private static boolean match(Pattern pat, String word) {
if (word == null || word.length() <= 0 || pat == null) {
return false;
}
return pat.matcher(word).matches();
}
/**
*
*/
@Override
public void startDocument() {
logger.debug("Start converting to interval file.");
}
/**
*
* @param uri
* String
* @param localName
* String
* @param qName
* String
* @param attributes
* Attributes
*/
@Override
public void startElement(final String uri, final String localName, final String qName,
final Attributes attributes) {
if (qName.equalsIgnoreCase("chat")) {
item = new Chat();
itemKicked = false;
// 通常コメントを処理するか, 投稿者コメントを処理するか
final String fork = attributes.getValue("fork");
final boolean isOwner = "1".equals(fork);
final String th = attributes.getValue("thread");
// 初めて見つけたthreadをメインスレッド(非オプショナルスレッド)とみなす
if (mainThreadId == null) {
mainThreadId = th;
}
final boolean isOptional = (mainThreadId == null) ? false : !mainThreadId.equals(th);
if(!processTypes.contains(CommentType.valueOf(isOwner, isOptional))){
itemKicked = true;
return;
}
//マイメモリ削除対象
final String deleted = attributes.getValue("deleted");
if (deleted != null && deleted.equalsIgnoreCase("1")) {
itemKicked = true;
return;
}
item.setDate(attributes.getValue("date"));
final String mail = attributes.getValue("mail");
if (match(ngWord, mail)) {
itemKicked = true;
return;
}
item.setMail(mail);
item.setNo(attributes.getValue("no"));
final String user_id = attributes.getValue("user_id");
if (match(ngId, user_id)) {
itemKicked = true;
return;
}
item.setUserID(user_id);
item.setVpos(attributes.getValue("vpos"));
}
}
/**
*
* @param ch
* char[]
* @param offset
* int
* @param length
* int
*/
@Override
public void characters(final char[] ch, final int offset, final int length) {
final char input[] = (new String(ch, offset, length)).toCharArray();
for (int i = 0; i < input.length; i++) {
if (!Character.isDefined(input[i])) {
input[i] = '?';
}
}
if (item != null) {
final String com = new String(input);
if (match(ngWord, com)) {
itemKicked = true;
return;
}
item.setComment(com);
}
}
/**
*
* @param uri
* String
* @param localName
* String
* @param qName
* String
*/
@Override
public void endElement(final String uri, final String localName, final String qName) {
if (qName.equalsIgnoreCase("chat")) {
if (!itemKicked) {
packet.addChat(item);
}
item = null;
}
}
/**
* ドキュメント終了
*/
@Override
public void endDocument() {
logger.debug("Converting finished.");
}
}