/*\r
- * This file is part of NeverNote \r
+ * This file is part of NixNote \r
* Copyright 2009 Randy Baumgarte\r
* \r
* This file may be licensed under the terms of of the\r
\r
package cx.fbn.nevernote.threads;\r
\r
-import java.io.ByteArrayInputStream;\r
-import java.io.ByteArrayOutputStream;\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.io.IOException;\r
+import java.io.InputStream;\r
+import java.util.List;\r
+import java.util.TreeSet;\r
import java.util.concurrent.LinkedBlockingQueue;\r
+import java.util.concurrent.locks.LockSupport;\r
\r
-import org.apache.commons.lang.StringEscapeUtils;\r
-import org.w3c.tidy.Tidy;\r
+import org.apache.commons.lang3.StringEscapeUtils;\r
+import org.apache.tika.exception.TikaException;\r
+import org.apache.tika.metadata.Metadata;\r
+import org.apache.tika.parser.ParseContext;\r
+import org.apache.tika.parser.microsoft.OfficeParser;\r
+import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;\r
+import org.apache.tika.parser.odf.OpenDocumentParser;\r
+import org.apache.tika.parser.pdf.PDFParser;\r
+import org.apache.tika.parser.rtf.RTFParser;\r
+import org.apache.tika.sax.BodyContentHandler;\r
+import org.xml.sax.ContentHandler;\r
+import org.xml.sax.SAXException;\r
\r
+import com.evernote.edam.type.Data;\r
import com.evernote.edam.type.Note;\r
import com.evernote.edam.type.Resource;\r
import com.trolltech.qt.core.QByteArray;\r
+import com.trolltech.qt.core.QIODevice.OpenModeFlag;\r
import com.trolltech.qt.core.QObject;\r
+import com.trolltech.qt.core.QTemporaryFile;\r
import com.trolltech.qt.xml.QDomDocument;\r
import com.trolltech.qt.xml.QDomElement;\r
import com.trolltech.qt.xml.QDomNodeList;\r
\r
import cx.fbn.nevernote.Global;\r
+import cx.fbn.nevernote.signals.IndexSignal;\r
import cx.fbn.nevernote.signals.NoteResourceSignal;\r
import cx.fbn.nevernote.signals.NoteSignal;\r
import cx.fbn.nevernote.sql.DatabaseConnection;\r
public volatile NoteSignal noteSignal;\r
public volatile NoteResourceSignal resourceSignal;\r
private int indexType;\r
- public final int CONTENT=1; \r
- public final int RESOURCE=2;\r
+ public final int SCAN=1; \r
+ public final int REINDEXALL=2;\r
+ public final int REINDEXNOTE=3;\r
public boolean keepRunning;\r
private final QDomDocument doc;\r
private static String regex = Global.getWordRegex();\r
+ public String specialIndexCharacters = "";\r
+ public boolean indexNoteBody = true;\r
+ public boolean indexNoteTitle = true;\r
+ public boolean indexImageRecognition = true;\r
private final DatabaseConnection conn;\r
private volatile LinkedBlockingQueue<String> workQueue;\r
private static int MAX_QUEUED_WAITING = 1000;\r
+ public boolean interrupt;\r
+ public boolean idle;\r
+ public boolean indexAttachmentsLocally = true;\r
+ public volatile IndexSignal signal;\r
+ private final TreeSet<String> foundWords;\r
+ int uncommittedCount = 0;\r
\r
\r
-\r
- \r
- public IndexRunner(String logname, String u, String uid, String pswd, String cpswd) {\r
+ public IndexRunner(String logname, String u, String i, String r, String uid, String pswd, String cpswd) {\r
+ foundWords = new TreeSet<String>();\r
logger = new ApplicationLogger(logname);\r
- conn = new DatabaseConnection(logger, u, uid, pswd, cpswd);\r
- noteSignal = new NoteSignal();\r
- resourceSignal = new NoteResourceSignal();\r
- indexType = CONTENT;\r
+ conn = new DatabaseConnection(logger, u, i, r, uid, pswd, cpswd, 500);\r
+ indexType = SCAN;\r
guid = null;\r
keepRunning = true;\r
doc = new QDomDocument();\r
@Override\r
public void run() {\r
thread().setPriority(Thread.MIN_PRIORITY);\r
+ noteSignal = new NoteSignal();\r
+ resourceSignal = new NoteResourceSignal();\r
+ signal = new IndexSignal();\r
logger.log(logger.EXTREME, "Starting index thread ");\r
while (keepRunning) {\r
+ idle=true;\r
try {\r
+ conn.commitTransaction();\r
+ uncommittedCount = 0;\r
String work = workQueue.take();\r
- if (work.startsWith("CONTENT")) {\r
- work = work.replace("CONTENT ", "");\r
- guid = work;\r
- indexType = CONTENT;\r
+ idle=false;\r
+ if (work.startsWith("SCAN")) {\r
+ guid=null;\r
+ interrupt = false;\r
+ indexType = SCAN;\r
}\r
- if (work.startsWith("RESOURCE")) {\r
- work = work.replace("RESOURCE ", "");\r
+ if (work.startsWith("REINDEXALL")) {\r
+ guid = null;\r
+ indexType=REINDEXALL;\r
+ }\r
+ if (work.startsWith("REINDEXNOTE")) {\r
+ work = work.replace("REINDEXNOTE ", "");\r
guid = work;\r
- indexType = RESOURCE;\r
+ indexType = REINDEXNOTE;\r
}\r
if (work.startsWith("STOP")) {\r
keepRunning = false;\r
- guid = work;\r
- }\r
- if (guid == null || guid.trim().equals("")) {\r
- setIndexType(0);\r
- resourceSignal.resourceIndexed.emit("null or empty guid");\r
+ guid = null;\r
}\r
logger.log(logger.EXTREME, "Type:" +indexType);\r
- if (indexType == CONTENT && keepRunning) {\r
- logger.log(logger.MEDIUM, "Indexing note: "+guid);\r
- indexNoteContent();\r
+ if (indexType == SCAN && keepRunning) {\r
+ logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");\r
+ scanUnindexed();\r
setIndexType(0);\r
}\r
- if (indexType == RESOURCE && keepRunning) {\r
- logger.log(logger.MEDIUM, "Indexing resource: "+guid);\r
- indexResource();\r
+ if (indexType == REINDEXALL && keepRunning) {\r
+ logger.log(logger.MEDIUM, "Marking all for reindex");\r
+ reindexAll();\r
setIndexType(0);\r
}\r
+ if (indexType == REINDEXNOTE && keepRunning) {\r
+ reindexNote();\r
+ }\r
} catch (InterruptedException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
+ logger.log(logger.LOW, "Thread interrupted exception: " +e.getMessage());\r
}\r
}\r
+ logger.log(logger.EXTREME, "Shutting down database");\r
conn.dbShutdown();\r
+ logger.log(logger.EXTREME, "Database shut down. Exiting thread");\r
}\r
\r
// Reindex a note\r
public void indexNoteContent() {\r
+ foundWords.clear();\r
+ \r
logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
\r
logger.log(logger.EXTREME, "Getting note content");\r
Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
- String data = n.getContent();\r
- \r
- logger.log(logger.EXTREME, "Removing any encrypted data");\r
- data = removeEnCrypt(data);\r
- logger.log(logger.EXTREME, "Removing xml markups");\r
- // These HTML characters need to be replaced by a space, or they'll cause words to jam together\r
-// data = data.toLowerCase().replace("<br>", " ").replace("<hr>", " ").replace("<p>", " ").replace("<href>", " ");\r
-// String text = StringEscapeUtils.unescapeHtml(data.replaceAll("\\<.*?\\>", ""));\r
- Tidy tidy = new Tidy();\r
- tidy.getStderr().close(); // the listener will capture messages\r
- tidy.setXmlTags(true);\r
- byte html[] = data.getBytes();\r
- ByteArrayInputStream is = new ByteArrayInputStream(html);\r
- ByteArrayOutputStream os = new ByteArrayOutputStream();\r
- tidy.parse(is, os);\r
- String text = StringEscapeUtils.unescapeHtml(os.toString().replaceAll("\\<.*?\\>", ""));\r
+ String data;\r
+ if (indexNoteBody) {\r
+ data = n.getContent();\r
+ data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
+ \r
+ logger.log(logger.EXTREME, "Removing any encrypted data");\r
+ data = removeEnCrypt(data.toString());\r
+ logger.log(logger.EXTREME, "Removing xml markups");\r
+ } else\r
+ data = "";\r
+ String text;\r
+ if (indexNoteTitle)\r
+ text = removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());\r
+ else\r
+ text = removeTags(StringEscapeUtils.unescapeHtml4(data));\r
\r
logger.log(logger.EXTREME, "Splitting words");\r
String[] result = text.toString().split(regex);\r
+ conn.commitTransaction();\r
+ conn.beginTransaction();\r
logger.log(logger.EXTREME, "Deleting existing words for note from index");\r
conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");\r
\r
logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
for (int j=0; j<result.length && keepRunning; j++) {\r
- logger.log(logger.EXTREME, "Result word: " +result[j]);\r
- if (result[j].length() > 0) {\r
- // We have a good word, now let's trim off junk at the beginning or end\r
- StringBuffer buffer = new StringBuffer(result[j].toLowerCase());\r
- for (int x = buffer.length()-1; x>=0; x--) {\r
- if (!Character.isLetterOrDigit(buffer.charAt(x)))\r
- buffer = buffer.deleteCharAt(x);\r
- else\r
- x=-1;\r
- }\r
- // Things have been trimmed off the end, so reverse the string & repeat.\r
- buffer = buffer.reverse();\r
- for (int x = buffer.length()-1; x>=0; x--) {\r
- if (!Character.isLetterOrDigit(buffer.charAt(x)))\r
- buffer = buffer.deleteCharAt(x);\r
- else\r
- x=-1;\r
- }\r
- // Restore the string back to the proper order.\r
- buffer = buffer.reverse();\r
- \r
- logger.log(logger.EXTREME, "Processing " +buffer);\r
- if (buffer.length()>=Global.minimumWordCount) {\r
- logger.log(logger.EXTREME, "Adding " +buffer);\r
- conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), "CONTENT", 100);\r
- }\r
+ if (interrupt) {\r
+ processInterrupt();\r
+ }\r
+ if (!result[j].trim().equals("")) {\r
+ logger.log(logger.EXTREME, "Result word: " +result[j].trim());\r
+ addToIndex(guid, result[j], "CONTENT");\r
}\r
}\r
+ \r
+ // Add tags\r
+ for (int j=0; j<n.getTagNamesSize(); j++) {\r
+ if (n.getTagNames() != null && n.getTagNames().get(j) != null && !n.getTagNames().get(j).trim().equals(""))\r
+ addToIndex(guid, n.getTagNames().get(j), "CONTENT");\r
+ }\r
+ \r
// If we were interrupted, we will reindex this note next time\r
if (Global.keepRunning) {\r
logger.log(logger.EXTREME, "Resetting note guid needed");\r
conn.getNoteTable().setIndexNeeded(guid, false);\r
- }\r
+ } \r
+ conn.commitTransaction();\r
+ uncommittedCount = 0;\r
logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
}\r
+ \r
+ \r
+ private String removeTags(String text) {\r
+ StringBuffer buffer = new StringBuffer(text);\r
+ boolean inTag = false;\r
+ for (int i=buffer.length()-1; i>=0; i--) {\r
+ if (buffer.charAt(i) == '>')\r
+ inTag = true;\r
+ if (buffer.charAt(i) == '<')\r
+ inTag = false;\r
+ if (inTag || buffer.charAt(i) == '<')\r
+ buffer.deleteCharAt(i);\r
+ }\r
+ \r
+ return buffer.toString();\r
+ }\r
\r
\r
public synchronized boolean addWork(String request) {\r
\r
if (guid == null)\r
return;\r
- \r
+ foundWords.clear();\r
Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);\r
- if (r == null || r.getRecognition() == null || r.getRecognition().getBody() == null || r.getRecognition().getBody().length == 0) \r
+ if (!indexImageRecognition || \r
+ r == null || r.getRecognition() == null || \r
+ r.getRecognition().getBody() == null || \r
+ r.getRecognition().getBody().length == 0) \r
resourceBinary = new QByteArray(" ");\r
else\r
resourceBinary = new QByteArray(r.getRecognition().getBody());\r
\r
- conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");\r
+ conn.commitTransaction();\r
+ conn.beginTransaction();\r
+ conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");\r
+ // This is due to an old bug & can be removed at some point in the future 11/23/2010\r
+ conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE"); \r
+ conn.commitTransaction();\r
+ uncommittedCount = 0;\r
+ conn.beginTransaction();\r
\r
doc.setContent(resourceBinary);\r
QDomElement docElem = doc.documentElement();\r
// look for text tags\r
QDomNodeList anchors = docElem.elementsByTagName("t");\r
for (int i=0; i<anchors.length() && keepRunning; i++) {\r
+ if (interrupt) {\r
+ if (interrupt) {\r
+ processInterrupt();\r
+ }\r
+ }\r
QDomElement enmedia = anchors.at(i).toElement();\r
String weight = new String(enmedia.attribute("w"));\r
String text = new String(enmedia.text()).toLowerCase();\r
if (!text.equals("")) {\r
- conn.getWordsTable().addWordToNoteIndex(guid, text, "RESOURCE", new Integer(weight));\r
+ conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));\r
+ uncommittedCount++;\r
+ if (uncommittedCount > 100) {\r
+ conn.commitTransaction();\r
+ uncommittedCount=0;\r
+ }\r
}\r
}\r
+ \r
+ if (Global.keepRunning && indexAttachmentsLocally) {\r
+ conn.commitTransaction();\r
+ uncommittedCount = 0;\r
+ conn.beginTransaction();\r
+ indexResourceContent(guid);\r
+ }\r
+ \r
if (Global.keepRunning)\r
conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
+ conn.commitTransaction();\r
+ uncommittedCount = 0;\r
+ }\r
+ \r
+ private void indexResourceContent(String guid) {\r
+ Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);\r
+ if (r != null && r.getMime() != null) {\r
+ if (r.getMime().equalsIgnoreCase("application/pdf")) {\r
+ indexResourcePDF(r);\r
+ return;\r
+ }\r
+ if (r.getMime().equalsIgnoreCase("application/docx") || \r
+ r.getMime().equalsIgnoreCase("application/xlsx") || \r
+ r.getMime().equalsIgnoreCase("application/pptx")) {\r
+ indexResourceOOXML(r);\r
+ return;\r
+ }\r
+ if (r.getMime().equalsIgnoreCase("application/vsd") ||\r
+ r.getMime().equalsIgnoreCase("application/ppt") ||\r
+ r.getMime().equalsIgnoreCase("application/xls") ||\r
+ r.getMime().equalsIgnoreCase("application/msg") ||\r
+ r.getMime().equalsIgnoreCase("application/doc")) {\r
+ indexResourceOffice(r);\r
+ return;\r
+ }\r
+ if (r.getMime().equalsIgnoreCase("application/rtf")) {\r
+ indexResourceRTF(r);\r
+ return;\r
+ }\r
+ if (r.getMime().equalsIgnoreCase("application/odf") ||\r
+ r.getMime().equalsIgnoreCase("application/odt") ||\r
+ r.getMime().equalsIgnoreCase("application/odp") ||\r
+ r.getMime().equalsIgnoreCase("application/odg") ||\r
+ r.getMime().equalsIgnoreCase("application/odb") ||\r
+ r.getMime().equalsIgnoreCase("application/ods")) {\r
+ indexResourceODF(r);\r
+ return;\r
+ }\r
+ }\r
+ }\r
+\r
+\r
+ private void indexResourceRTF(Resource r) {\r
+\r
+ Data d = r.getData();\r
+ for (int i=0; i<20 && d.getSize() == 0; i++)\r
+ d = r.getData();\r
+ if (d.getSize()== 0)\r
+ return;\r
+\r
+ QTemporaryFile f = writeResource(d);\r
+ if (!keepRunning) {\r
+ return;\r
+ }\r
+ \r
+ InputStream input;\r
+ try {\r
+ input = new FileInputStream(new File(f.fileName()));\r
+ ContentHandler textHandler = new BodyContentHandler(-1);\r
+ Metadata metadata = new Metadata();\r
+ RTFParser parser = new RTFParser(); \r
+ ParseContext context = new ParseContext();\r
+ parser.parse(input, textHandler, metadata, context);\r
+ String[] result = textHandler.toString().split(regex);\r
+ for (int i=0; i<result.length && keepRunning; i++) {\r
+ addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
+ }\r
+ input.close();\r
+ \r
+ f.close();\r
+ } catch (java.lang.ClassCastException e) {\r
+ logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
+ } catch (FileNotFoundException e) {\r
+ logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());\r
+ } catch (IOException e) {\r
+ logger.log(logger.LOW, "IO exception: " +e.getMessage());\r
+ } catch (SAXException e) {\r
+ logger.log(logger.LOW, "SAX exception: " +e.getMessage());\r
+ } catch (TikaException e) {\r
+ logger.log(logger.LOW, "Tika exception: " +e.getMessage());\r
+ } catch (Exception e) {\r
+ logger.log(logger.LOW, "Unknown exception: " +e.getMessage());\r
+ } catch (java.lang.NoSuchMethodError e) {\r
+ logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
+ } catch (Error e) {\r
+ logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
+ }\r
}\r
\r
\r
+ private void indexResourceODF(Resource r) {\r
+\r
+ Data d = r.getData();\r
+ for (int i=0; i<20 && d.getSize() == 0; i++)\r
+ d = r.getData();\r
+ if (d.getSize()== 0)\r
+ return;\r
+ QTemporaryFile f = writeResource(d);\r
+ if (!keepRunning) {\r
+ return;\r
+ }\r
+ \r
+ InputStream input;\r
+ try {\r
+ input = new FileInputStream(new File(f.fileName()));\r
+ ContentHandler textHandler = new BodyContentHandler(-1);\r
+ Metadata metadata = new Metadata();\r
+ OpenDocumentParser parser = new OpenDocumentParser(); \r
+ ParseContext context = new ParseContext();\r
+ parser.parse(input, textHandler, metadata, context);\r
+ String[] result = textHandler.toString().split(regex);\r
+ for (int i=0; i<result.length && keepRunning; i++) {\r
+ if (interrupt) {\r
+ processInterrupt();\r
+ }\r
+ addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
+ }\r
+ input.close();\r
+ \r
+ f.close();\r
+ } catch (java.lang.ClassCastException e) {\r
+ logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
+ } catch (FileNotFoundException e) {\r
+ logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());\r
+ } catch (IOException e) {\r
+ logger.log(logger.LOW, "IO exception: " +e.getMessage());\r
+ } catch (SAXException e) {\r
+ logger.log(logger.LOW, "SAX exception: " +e.getMessage());\r
+ } catch (TikaException e) {\r
+ logger.log(logger.LOW, "Tika exception: " +e.getMessage());\r
+ } catch (Exception e) {\r
+ logger.log(logger.LOW, "Unknown exception: " +e.getMessage());\r
+ } catch (java.lang.NoSuchMethodError e) {\r
+ logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
+ } catch (Error e) {\r
+ logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
+ }\r
+ }\r
+\r
+ \r
+ private void indexResourceOffice(Resource r) {\r
+\r
+ Data d = r.getData();\r
+ for (int i=0; i<20 && d.getSize() == 0; i++)\r
+ d = r.getData();\r
+ if (d.getSize()== 0)\r
+ return;\r
+ QTemporaryFile f = writeResource(d);\r
+ if (!keepRunning) {\r
+ return;\r
+ }\r
+ \r
+ InputStream input;\r
+ try {\r
+ input = new FileInputStream(new File(f.fileName()));\r
+ ContentHandler textHandler = new BodyContentHandler(-1);\r
+ Metadata metadata = new Metadata();\r
+ OfficeParser parser = new OfficeParser(); \r
+ ParseContext context = new ParseContext();\r
+ parser.parse(input, textHandler, metadata, context);\r
+ String[] result = textHandler.toString().split(regex);\r
+ for (int i=0; i<result.length && keepRunning; i++) {\r
+ if (interrupt) {\r
+ processInterrupt();\r
+ }\r
+ addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
+ }\r
+ input.close();\r
+ \r
+ f.close();\r
+ } catch (java.lang.ClassCastException e) {\r
+ logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
+ } catch (FileNotFoundException e) {\r
+ logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());\r
+ } catch (IOException e) {\r
+ logger.log(logger.LOW, "IO exception: " +e.getMessage());\r
+ } catch (SAXException e) {\r
+ logger.log(logger.LOW, "SAX exception: " +e.getMessage());\r
+ } catch (TikaException e) {\r
+ logger.log(logger.LOW, "Tika exception: " +e.getMessage());\r
+ } catch (Exception e) {\r
+ logger.log(logger.LOW, "Unknown exception: " +e.getMessage());\r
+ } catch (java.lang.NoSuchMethodError e) {\r
+ logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
+ } catch (Error e) {\r
+ logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
+ }\r
+ }\r
+\r
+ \r
+ \r
+ private void indexResourcePDF(Resource r) {\r
+\r
+ Data d = r.getData();\r
+ for (int i=0; i<20 && d.getSize() == 0; i++)\r
+ d = r.getData();\r
+ if (d.getSize()== 0)\r
+ return;\r
+ QTemporaryFile f = writeResource(d);\r
+ if (!keepRunning) {\r
+ return;\r
+ }\r
+ \r
+ InputStream input;\r
+ try { \r
+ input = new FileInputStream(new File(f.fileName()));\r
+ ContentHandler textHandler = new BodyContentHandler(-1);\r
+ Metadata metadata = new Metadata();\r
+ PDFParser parser = new PDFParser(); \r
+ ParseContext context = new ParseContext();\r
+ parser.parse(input, textHandler, metadata, context);\r
+ String[] result = textHandler.toString().split(regex);\r
+ for (int i=0; i<result.length && keepRunning; i++) {\r
+ if (interrupt) {\r
+ processInterrupt();\r
+ }\r
+ addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
+ }\r
+ input.close();\r
+ \r
+ f.close();\r
+ } catch (java.lang.ClassCastException e) {\r
+ logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
+ } catch (FileNotFoundException e) {\r
+ logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());\r
+ } catch (IOException e) {\r
+ logger.log(logger.LOW, "IO exception: " +e.getMessage());\r
+ } catch (SAXException e) {\r
+ logger.log(logger.LOW, "SAX exception: " +e.getMessage());\r
+ } catch (TikaException e) {\r
+ logger.log(logger.LOW, "Tika exception: " +e.getMessage());\r
+ } catch (Exception e) {\r
+ logger.log(logger.LOW, "Unknown exception: " +e.getMessage());\r
+ } catch (java.lang.NoSuchMethodError e) {\r
+ logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
+ } catch (Error e) {\r
+ logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
+ }\r
+ }\r
+ \r
+ \r
+ private void indexResourceOOXML(Resource r) {\r
+\r
+ Data d = r.getData();\r
+ for (int i=0; i<20 && d.getSize() == 0; i++)\r
+ d = r.getData();\r
+ if (d.getSize()== 0)\r
+ return;\r
+ QTemporaryFile f = writeResource(d);\r
+ if (!keepRunning) {\r
+ return;\r
+ }\r
+ \r
+ InputStream input;\r
+ try {\r
+ input = new FileInputStream(new File(f.fileName()));\r
+ ContentHandler textHandler = new BodyContentHandler(-1);\r
+ Metadata metadata = new Metadata();\r
+ OOXMLParser parser = new OOXMLParser(); \r
+ ParseContext context = new ParseContext();\r
+ parser.parse(input, textHandler, metadata, context);\r
+ String[] result = textHandler.toString().split(regex);\r
+ for (int i=0; i<result.length && keepRunning; i++) {\r
+ if (interrupt) {\r
+ processInterrupt();\r
+ }\r
+ addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
+ }\r
+ input.close();\r
+ \r
+ f.close();\r
+ } catch (java.lang.ClassCastException e) {\r
+ logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
+ } catch (FileNotFoundException e) {\r
+ logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());\r
+ } catch (IOException e) {\r
+ logger.log(logger.LOW, "IO exception: " +e.getMessage());\r
+ } catch (SAXException e) {\r
+ logger.log(logger.LOW, "SAX exception: " +e.getMessage());\r
+ } catch (TikaException e) {\r
+ logger.log(logger.LOW, "Tika exception: " +e.getMessage());\r
+ } catch (Exception e) {\r
+ logger.log(logger.LOW, "Unknown exception: " +e.getMessage());\r
+ } catch (java.lang.NoSuchMethodError e) {\r
+ logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
+ } catch (Error e) {\r
+ logger.log(logger.LOW, "Unknown error: " +e.getMessage()); }\r
+ }\r
+ \r
+\r
+ \r
+ private QTemporaryFile writeResource(Data d) {\r
+ QTemporaryFile newFile = new QTemporaryFile();\r
+ newFile.open(OpenModeFlag.WriteOnly);\r
+ newFile.write(d.getBody());\r
+ newFile.close();\r
+ return newFile;\r
+ } \r
+\r
+ \r
private String removeEnCrypt(String content) {\r
int index = content.indexOf("<en-crypt");\r
int endPos;\r
boolean tagFound = true;\r
while (tagFound && keepRunning) {\r
+ if (interrupt) {\r
+ processInterrupt();\r
+ }\r
endPos = content.indexOf("</en-crypt>", index)+11;\r
if (endPos > -1 && index > -1) {\r
content = content.substring(0,index)+content.substring(endPos);\r
}\r
\r
\r
+ private void addToIndex(String guid, String word, String type) {\r
+ if (foundWords.contains(word))\r
+ return;\r
+ StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
+ for (int i=buffer.length()-1; i>=0; i--) {\r
+ if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)\r
+ buffer.deleteCharAt(i);\r
+ else\r
+ break;\r
+ }\r
+ buffer = buffer.reverse();\r
+ for (int i=buffer.length()-1; i>=0; i--) {\r
+ if (!Character.isLetterOrDigit(buffer.charAt(i)))\r
+ buffer.deleteCharAt(i);\r
+ else\r
+ break;\r
+ }\r
+ buffer = buffer.reverse();\r
+ if (buffer.length() > 0) {\r
+ // We have a good word, now let's trim off junk at the beginning or end\r
+ if (!foundWords.contains(buffer.toString())) {\r
+ foundWords.add(buffer.toString());\r
+ foundWords.add(word);\r
+ conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
+ uncommittedCount++;\r
+ if (uncommittedCount > 100) {\r
+ conn.commitTransaction();\r
+ uncommittedCount=0;\r
+ }\r
+ }\r
+ }\r
+ return;\r
+ }\r
+ \r
+ private void scanUnindexed() {\r
+ List<String> notes = conn.getNoteTable().getUnindexed();\r
+ guid = null;\r
+ boolean started = false;\r
+ if (notes.size() > 0) {\r
+ signal.indexStarted.emit();\r
+ started = true;\r
+ }\r
+ for (int i=0; i<notes.size() && keepRunning; i++) {\r
+ if (interrupt) {\r
+ processInterrupt();\r
+ }\r
+ guid = notes.get(i);\r
+ if (guid != null && keepRunning) {\r
+ indexNoteContent();\r
+ }\r
+ }\r
+ \r
+ List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();\r
+ if (unindexedResources.size() > 0 && !started) {\r
+ signal.indexStarted.emit();\r
+ started = true;\r
+ }\r
+ for (int i=0; i<unindexedResources.size()&& keepRunning; i++) {\r
+ if (interrupt) {\r
+ processInterrupt();\r
+ }\r
+ guid = unindexedResources.get(i);\r
+ if (keepRunning) {\r
+ indexResource();\r
+ }\r
+ }\r
+ \r
+ // Cleanup stuff that was deleted at some point\r
+ List<String> guids = conn.getWordsTable().getGuidList();\r
+ logger.log(logger.LOW, "GUIDS in index: " +guids.size());\r
+ for (int i=0; i<guids.size() && keepRunning; i++) {\r
+ if (!conn.getNoteTable().exists(guids.get(i))) {\r
+ logger.log(logger.LOW, "Old GUID found: " +guids.get(i));\r
+ conn.getWordsTable().expunge(guids.get(i));\r
+ }\r
+ }\r
+ \r
+ if (started && keepRunning) \r
+ signal.indexFinished.emit();\r
+ }\r
\r
+ private void reindexNote() {\r
+ if (guid == null)\r
+ return;\r
+ conn.getNoteTable().setIndexNeeded(guid, true);\r
+ }\r
\r
+ private void reindexAll() {\r
+ conn.getNoteTable().reindexAllNotes();\r
+ conn.getNoteTable().noteResourceTable.reindexAll(); \r
+ }\r
\r
+ private void waitSeconds(int len) {\r
+ long starttime = 0; // variable declared\r
+ //...\r
+ // for the first time, remember the timestamp\r
+ starttime = System.currentTimeMillis();\r
+ // the next timestamp we want to wake up\r
+ starttime += (1000.0);\r
+ // Wait until the desired next time arrives using nanosecond\r
+ // accuracy timer (wait(time) isn't accurate enough on most platforms) \r
+ LockSupport.parkNanos((Math.max(0, \r
+ starttime - System.currentTimeMillis()) * 1000000));\r
+ }\r
+ \r
+ private void processInterrupt() {\r
+ conn.commitTransaction();\r
+ waitSeconds(1);\r
+ uncommittedCount = 0;\r
+ conn.beginTransaction();\r
+ interrupt = false;\r
+ }\r
+ \r
}\r