/*
- * This file is part of NixNote
+ * This file is part of NixNote/NeighborNote
* Copyright 2009 Randy Baumgarte
+ * Copyright 2013 Yuki Takahashi
*
* This file may be licensed under the terms of of the
* GNU General Public License Version 2 (the ``GPL'').
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.locks.LockSupport;
-import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.SAXException;
import com.evernote.edam.type.Data;
-import com.evernote.edam.type.Note;
import com.evernote.edam.type.Resource;
import com.trolltech.qt.core.QByteArray;
import com.trolltech.qt.core.QIODevice.OpenModeFlag;
public final int REINDEXNOTE=3;
public boolean keepRunning;
private final QDomDocument doc;
- private static String regex = Global.getWordRegex();
- public String specialIndexCharacters = "";
- public boolean indexNoteBody = true;
- public boolean indexNoteTitle = true;
+// private static String regex = Global.getWordRegex();
+// public String specialIndexCharacters = "";
+// public boolean indexNoteBody = true;
+// public boolean indexNoteTitle = true;
public boolean indexImageRecognition = true;
private final DatabaseConnection conn;
private volatile LinkedBlockingQueue<String> workQueue;
}
// Reindex a note
- public void indexNoteContent() {
- foundWords.clear();
-
- logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");
-
- logger.log(logger.EXTREME, "Getting note content");
- Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);
- String data;
- if (indexNoteBody) {
- data = n.getContent();
- data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());
-
- logger.log(logger.EXTREME, "Removing any encrypted data");
- data = removeEnCrypt(data.toString());
- logger.log(logger.EXTREME, "Removing xml markups");
- } else
- data = "";
- String text;
- if (indexNoteTitle)
- text = removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());
- else
- text = removeTags(StringEscapeUtils.unescapeHtml4(data));
-
- logger.log(logger.EXTREME, "Splitting words");
- String[] result = text.toString().split(regex);
- conn.commitTransaction();
- conn.beginTransaction();
- logger.log(logger.EXTREME, "Deleting existing words for note from index");
- conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");
-
- logger.log(logger.EXTREME, "Number of words found: " +result.length);
- for (int j=0; j<result.length && keepRunning; j++) {
- if (interrupt) {
- processInterrupt();
- }
- if (!result[j].trim().equals("")) {
- logger.log(logger.EXTREME, "Result word: " +result[j].trim());
- addToIndex(guid, result[j], "CONTENT");
- }
- }
-
- // Add tags
- for (int j=0; j<n.getTagNamesSize(); j++) {
- if (n.getTagNames() != null && n.getTagNames().get(j) != null && !n.getTagNames().get(j).trim().equals(""))
- addToIndex(guid, n.getTagNames().get(j), "CONTENT");
- }
-
- // If we were interrupted, we will reindex this note next time
- if (Global.keepRunning) {
- logger.log(logger.EXTREME, "Resetting note guid needed");
- conn.getNoteTable().setIndexNeeded(guid, false);
- }
- conn.commitTransaction();
- uncommittedCount = 0;
- logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");
- }
+// public void indexNoteContent() {
+// foundWords.clear();
+//
+// logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");
+//
+// logger.log(logger.EXTREME, "Getting note content");
+// Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);
+// String data;
+// if (indexNoteBody) {
+// data = n.getContent();
+// data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());
+//
+// logger.log(logger.EXTREME, "Removing any encrypted data");
+// data = removeEnCrypt(data.toString());
+// logger.log(logger.EXTREME, "Removing xml markups");
+// } else
+// data = "";
+// String text;
+// if (indexNoteTitle)
+// text = removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());
+// else
+// text = removeTags(StringEscapeUtils.unescapeHtml4(data));
+//
+// logger.log(logger.EXTREME, "Splitting words");
+// String[] result = text.toString().split(regex);
+// conn.commitTransaction();
+// conn.beginTransaction();
+// logger.log(logger.EXTREME, "Deleting existing words for note from index");
+// conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");
+//
+// logger.log(logger.EXTREME, "Number of words found: " +result.length);
+// for (int j=0; j<result.length && keepRunning; j++) {
+// if (interrupt) {
+// processInterrupt();
+// }
+// if (!result[j].trim().equals("")) {
+// logger.log(logger.EXTREME, "Result word: " +result[j].trim());
+// addToIndex(guid, result[j], "CONTENT");
+// }
+// }
+//
+// // Add tags
+// for (int j=0; j<n.getTagNamesSize(); j++) {
+// if (n.getTagNames() != null && n.getTagNames().get(j) != null && !n.getTagNames().get(j).trim().equals(""))
+// addToIndex(guid, n.getTagNames().get(j), "CONTENT");
+// }
+//
+// // If we were interrupted, we will reindex this note next time
+// if (Global.keepRunning) {
+// logger.log(logger.EXTREME, "Resetting note guid needed");
+// conn.getNoteTable().setIndexNeeded(guid, false);
+// }
+// conn.commitTransaction();
+// uncommittedCount = 0;
+// logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");
+// }
private String removeTags(String text) {
RTFParser parser = new RTFParser();
ParseContext context = new ParseContext();
parser.parse(input, textHandler, metadata, context);
- String[] result = textHandler.toString().split(regex);
- for (int i=0; i<result.length && keepRunning; i++) {
- addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
- }
+// String[] result = textHandler.toString().split(regex);
+// for (int i=0; i<result.length && keepRunning; i++) {
+// addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+// }
+ updateResourceText(r.getGuid(), textHandler.toString());
input.close();
f.close();
OpenDocumentParser parser = new OpenDocumentParser();
ParseContext context = new ParseContext();
parser.parse(input, textHandler, metadata, context);
- String[] result = textHandler.toString().split(regex);
- for (int i=0; i<result.length && keepRunning; i++) {
- if (interrupt) {
- processInterrupt();
- }
- addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
- }
+// String[] result = textHandler.toString().split(regex);
+// for (int i=0; i<result.length && keepRunning; i++) {
+// if (interrupt) {
+// processInterrupt();
+// }
+// addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+// }
+ updateResourceText(r.getGuid(), textHandler.toString());
input.close();
f.close();
OfficeParser parser = new OfficeParser();
ParseContext context = new ParseContext();
parser.parse(input, textHandler, metadata, context);
- String[] result = textHandler.toString().split(regex);
- for (int i=0; i<result.length && keepRunning; i++) {
- if (interrupt) {
- processInterrupt();
- }
- addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
- }
+// String[] result = textHandler.toString().split(regex);
+// for (int i=0; i<result.length && keepRunning; i++) {
+// if (interrupt) {
+// processInterrupt();
+// }
+// addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+// }
+ updateResourceText(r.getGuid(), textHandler.toString());
input.close();
f.close();
PDFParser parser = new PDFParser();
ParseContext context = new ParseContext();
parser.parse(input, textHandler, metadata, context);
- String[] result = textHandler.toString().split(regex);
- for (int i=0; i<result.length && keepRunning; i++) {
- if (interrupt) {
- processInterrupt();
- }
- addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
- }
+// String[] result = textHandler.toString().split(regex);
+// for (int i=0; i<result.length && keepRunning; i++) {
+// if (interrupt) {
+// processInterrupt();
+// }
+// addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+// }
+ updateResourceText(r.getGuid(), textHandler.toString());
input.close();
f.close();
OOXMLParser parser = new OOXMLParser();
ParseContext context = new ParseContext();
parser.parse(input, textHandler, metadata, context);
- String[] result = textHandler.toString().split(regex);
- for (int i=0; i<result.length && keepRunning; i++) {
- if (interrupt) {
- processInterrupt();
- }
- addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
- }
+// String[] result = textHandler.toString().split(regex);
+// for (int i=0; i<result.length && keepRunning; i++) {
+// if (interrupt) {
+// processInterrupt();
+// }
+// addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+// }
+ updateResourceText(r.getGuid(), textHandler.toString());
input.close();
f.close();
}
- private void addToIndex(String guid, String word, String type) {
- if (foundWords.contains(word))
- return;
- StringBuffer buffer = new StringBuffer(word.toLowerCase());
- for (int i=buffer.length()-1; i>=0; i--) {
- if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)
- buffer.deleteCharAt(i);
- else
- break;
- }
- buffer = buffer.reverse();
- for (int i=buffer.length()-1; i>=0; i--) {
- if (!Character.isLetterOrDigit(buffer.charAt(i)))
- buffer.deleteCharAt(i);
- else
- break;
- }
- buffer = buffer.reverse();
- if (buffer.length() > 0) {
- // We have a good word, now let's trim off junk at the beginning or end
- if (!foundWords.contains(buffer.toString())) {
- foundWords.add(buffer.toString());
- foundWords.add(word);
- conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);
- uncommittedCount++;
- if (uncommittedCount > 100) {
- conn.commitTransaction();
- uncommittedCount=0;
- }
- }
- }
- return;
+// private void addToIndex(String guid, String word, String type) {
+// if (foundWords.contains(word))
+// return;
+// StringBuffer buffer = new StringBuffer(word.toLowerCase());
+// for (int i=buffer.length()-1; i>=0; i--) {
+// if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)
+// buffer.deleteCharAt(i);
+// else
+// break;
+// }
+// buffer = buffer.reverse();
+// for (int i=buffer.length()-1; i>=0; i--) {
+// if (!Character.isLetterOrDigit(buffer.charAt(i)))
+// buffer.deleteCharAt(i);
+// else
+// break;
+// }
+// buffer = buffer.reverse();
+// if (buffer.length() > 0) {
+// // We have a good word, now let's trim off junk at the beginning or end
+// if (!foundWords.contains(buffer.toString())) {
+// foundWords.add(buffer.toString());
+// foundWords.add(word);
+// conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);
+// uncommittedCount++;
+// if (uncommittedCount > 100) {
+// conn.commitTransaction();
+// uncommittedCount=0;
+// }
+// }
+// }
+// return;
+// }
+
+ // ノートリソーステーブルのリソーステキストに追加
+ private void updateResourceText(String guid, String text) {
+ conn.getNoteTable().noteResourceTable.updateResourceText(guid, text);
}
private void scanUnindexed() {
- List<String> notes = conn.getNoteTable().getUnindexed();
+// List<String> notes = conn.getNoteTable().getUnindexed();
guid = null;
boolean started = false;
- if (notes.size() > 0) {
- signal.indexStarted.emit();
- started = true;
- }
- for (int i=0; i<notes.size() && keepRunning; i++) {
- if (interrupt) {
- processInterrupt();
- }
- guid = notes.get(i);
- if (guid != null && keepRunning) {
- indexNoteContent();
- }
- }
+// if (notes.size() > 0) {
+// signal.indexStarted.emit();
+// started = true;
+// }
+// for (int i=0; i<notes.size() && keepRunning; i++) {
+// if (interrupt) {
+// processInterrupt();
+// }
+// guid = notes.get(i);
+// if (guid != null && keepRunning) {
+// indexNoteContent();
+// }
+// }
List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();
if (unindexedResources.size() > 0 && !started) {