-/*\r
- * This file is part of NeverNote \r
- * Copyright 2009 Randy Baumgarte\r
- * \r
- * This file may be licensed under the terms of of the\r
- * GNU General Public License Version 2 (the ``GPL'').\r
- *\r
- * Software distributed under the License is distributed\r
- * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
- * express or implied. See the GPL for the specific language\r
- * governing rights and limitations.\r
- *\r
- * You should have received a copy of the GPL along with this\r
- * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
- * or write to the Free Software Foundation, Inc.,\r
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
- *\r
-*/\r
-\r
-package cx.fbn.nevernote.threads;\r
-\r
-import java.io.File;\r
-import java.io.FileInputStream;\r
-import java.io.FileNotFoundException;\r
-import java.io.IOException;\r
-import java.io.InputStream;\r
-import java.util.List;\r
-import java.util.concurrent.LinkedBlockingQueue;\r
-\r
-import org.apache.commons.lang.StringEscapeUtils;\r
-import org.apache.tika.exception.TikaException;\r
-import org.apache.tika.metadata.Metadata;\r
-import org.apache.tika.parser.ParseContext;\r
-import org.apache.tika.parser.microsoft.OfficeParser;\r
-import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;\r
-import org.apache.tika.parser.odf.OpenDocumentParser;\r
-import org.apache.tika.parser.pdf.PDFParser;\r
-import org.apache.tika.parser.rtf.RTFParser;\r
-import org.apache.tika.sax.BodyContentHandler;\r
-import org.xml.sax.ContentHandler;\r
-import org.xml.sax.SAXException;\r
-\r
-import com.evernote.edam.type.Data;\r
-import com.evernote.edam.type.Note;\r
-import com.evernote.edam.type.Resource;\r
-import com.trolltech.qt.core.QByteArray;\r
-import com.trolltech.qt.core.QIODevice.OpenModeFlag;\r
-import com.trolltech.qt.core.QObject;\r
-import com.trolltech.qt.core.QTemporaryFile;\r
-import com.trolltech.qt.xml.QDomDocument;\r
-import com.trolltech.qt.xml.QDomElement;\r
-import com.trolltech.qt.xml.QDomNodeList;\r
-\r
-import cx.fbn.nevernote.Global;\r
-import cx.fbn.nevernote.signals.IndexSignal;\r
-import cx.fbn.nevernote.signals.NoteResourceSignal;\r
-import cx.fbn.nevernote.signals.NoteSignal;\r
-import cx.fbn.nevernote.sql.DatabaseConnection;\r
-import cx.fbn.nevernote.utilities.ApplicationLogger;\r
-\r
-public class IndexRunner extends QObject implements Runnable {\r
- \r
- private final ApplicationLogger logger;\r
- private String guid;\r
- private QByteArray resourceBinary;\r
- public volatile NoteSignal noteSignal;\r
- public volatile NoteResourceSignal resourceSignal;\r
- private int indexType;\r
- public final int SCAN=1; \r
- public final int REINDEXALL=2;\r
- public final int REINDEXNOTE=3;\r
- public boolean keepRunning;\r
- private final QDomDocument doc;\r
- private static String regex = Global.getWordRegex();\r
- private final DatabaseConnection conn;\r
- private volatile LinkedBlockingQueue<String> workQueue;\r
- private static int MAX_QUEUED_WAITING = 1000;\r
- public boolean interrupt;\r
- public boolean idle;\r
- public boolean indexAttachmentsLocally = true;\r
- public volatile IndexSignal signal;\r
-\r
- \r
- public IndexRunner(String logname, String u, String uid, String pswd, String cpswd) {\r
- logger = new ApplicationLogger(logname);\r
- conn = new DatabaseConnection(logger, u, uid, pswd, cpswd);\r
- indexType = SCAN;\r
- guid = null;\r
- keepRunning = true;\r
- doc = new QDomDocument();\r
- workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING); \r
- }\r
- \r
- public void setIndexType(int t) {\r
- indexType = t;\r
- }\r
- \r
- \r
- @Override\r
- public void run() {\r
- thread().setPriority(Thread.MIN_PRIORITY);\r
- noteSignal = new NoteSignal();\r
- resourceSignal = new NoteResourceSignal();\r
- signal = new IndexSignal();\r
- logger.log(logger.EXTREME, "Starting index thread ");\r
- while (keepRunning) {\r
- idle=true;\r
- try {\r
- String work = workQueue.take();\r
- idle=false;\r
- if (work.startsWith("SCAN")) {\r
- guid=null;\r
- interrupt = false;\r
- indexType = SCAN;\r
- }\r
- if (work.startsWith("REINDEXALL")) {\r
- guid = null;\r
- indexType=REINDEXALL;\r
- }\r
- if (work.startsWith("REINDEXNOTE")) {\r
- work = work.replace("REINDEXNOTE ", "");\r
- guid = work;\r
- indexType = REINDEXNOTE;\r
- }\r
- if (work.startsWith("STOP")) {\r
- keepRunning = false;\r
- guid = null;\r
- }\r
- logger.log(logger.EXTREME, "Type:" +indexType);\r
- if (indexType == SCAN && keepRunning) {\r
- logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");\r
- scanUnindexed();\r
- setIndexType(0);\r
- }\r
- if (indexType == REINDEXALL && keepRunning) {\r
- logger.log(logger.MEDIUM, "Marking all for reindex");\r
- reindexAll();\r
- setIndexType(0);\r
- }\r
- if (indexType == REINDEXNOTE && keepRunning) {\r
- reindexNote();\r
- }\r
- } catch (InterruptedException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- }\r
- }\r
- logger.log(logger.EXTREME, "Shutting down database");\r
- conn.dbShutdown();\r
- logger.log(logger.EXTREME, "Database shut down. Exiting thread");\r
- }\r
- \r
- // Reindex a note\r
- public void indexNoteContent() {\r
- \r
- logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
- \r
- logger.log(logger.EXTREME, "Getting note content");\r
- Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
- String data = n.getContent();\r
- data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
- System.out.println(data);\r
- \r
- logger.log(logger.EXTREME, "Removing any encrypted data");\r
- data = removeEnCrypt(data.toString());\r
- logger.log(logger.EXTREME, "Removing xml markups");\r
- String text = removeTags(StringEscapeUtils.unescapeHtml(data) +" "+\r
- n.getTitle());\r
- \r
- logger.log(logger.EXTREME, "Splitting words");\r
- String[] result = text.toString().split(regex);\r
- logger.log(logger.EXTREME, "Deleting existing words for note from index");\r
- conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");\r
- \r
- logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
- for (int j=0; j<result.length && keepRunning; j++) {\r
- if (!result[j].trim().equals("")) {\r
- logger.log(logger.EXTREME, "Result word: " +result[j]);\r
- addToIndex(guid, result[j], "CONTENT");\r
- }\r
- }\r
- // If we were interrupted, we will reindex this note next time\r
- if (Global.keepRunning) {\r
- logger.log(logger.EXTREME, "Resetting note guid needed");\r
- conn.getNoteTable().setIndexNeeded(guid, false);\r
- }\r
- logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
- }\r
- \r
- \r
- private String removeTags(String text) {\r
- StringBuffer buffer = new StringBuffer(text);\r
- boolean inTag = false;\r
- for (int i=buffer.length()-1; i>=0; i--) {\r
- if (buffer.charAt(i) == '>')\r
- inTag = true;\r
- if (buffer.charAt(i) == '<')\r
- inTag = false;\r
- if (inTag || buffer.charAt(i) == '<')\r
- buffer.deleteCharAt(i);\r
- }\r
- \r
- return buffer.toString();\r
- }\r
-\r
- \r
- public synchronized boolean addWork(String request) {\r
- if (workQueue.size() == 0) {\r
- workQueue.offer(request);\r
- return true;\r
- }\r
- return false;\r
- }\r
- \r
- public synchronized int getWorkQueueSize() {\r
- return workQueue.size();\r
- }\r
- \r
- public void indexResource() {\r
- \r
- if (guid == null)\r
- return;\r
- \r
- Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);\r
- if (r == null || r.getRecognition() == null || r.getRecognition().getBody() == null || r.getRecognition().getBody().length == 0) \r
- resourceBinary = new QByteArray(" ");\r
- else\r
- resourceBinary = new QByteArray(r.getRecognition().getBody());\r
- \r
- conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");\r
- // This is due to an old bug & can be removed at some point in the future 11/23/2010\r
- conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE"); \r
- \r
- doc.setContent(resourceBinary);\r
- QDomElement docElem = doc.documentElement();\r
- \r
- // look for text tags\r
- QDomNodeList anchors = docElem.elementsByTagName("t");\r
- for (int i=0; i<anchors.length() && keepRunning; i++) {\r
- QDomElement enmedia = anchors.at(i).toElement();\r
- String weight = new String(enmedia.attribute("w"));\r
- String text = new String(enmedia.text()).toLowerCase();\r
- if (!text.equals("")) {\r
- conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));\r
- }\r
- }\r
- \r
- if (Global.keepRunning && indexAttachmentsLocally) {\r
- indexResourceContent(guid);\r
- }\r
- \r
- if (Global.keepRunning)\r
- conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
- }\r
- \r
- private void indexResourceContent(String guid) {\r
- Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);\r
- if (r.getMime().equalsIgnoreCase("application/pdf")) {\r
- indexResourcePDF(r);\r
- return;\r
- }\r
- if (r.getMime().equalsIgnoreCase("application/docx") || \r
- r.getMime().equalsIgnoreCase("application/xlsx") || \r
- r.getMime().equalsIgnoreCase("application/pptx")) {\r
- indexResourceOOXML(r);\r
- return;\r
- }\r
- if (r.getMime().equalsIgnoreCase("application/vsd") ||\r
- r.getMime().equalsIgnoreCase("application/ppt") ||\r
- r.getMime().equalsIgnoreCase("application/xls") ||\r
- r.getMime().equalsIgnoreCase("application/msg") ||\r
- r.getMime().equalsIgnoreCase("application/doc")) {\r
- indexResourceOffice(r);\r
- return;\r
- }\r
- if (r.getMime().equalsIgnoreCase("application/rtf")) {\r
- indexResourceRTF(r);\r
- return;\r
- }\r
- if (r.getMime().equalsIgnoreCase("application/odf") ||\r
- r.getMime().equalsIgnoreCase("application/odt") ||\r
- r.getMime().equalsIgnoreCase("application/odp") ||\r
- r.getMime().equalsIgnoreCase("application/odg") ||\r
- r.getMime().equalsIgnoreCase("application/odb") ||\r
- r.getMime().equalsIgnoreCase("application/ods")) {\r
- indexResourceODF(r);\r
- return;\r
- }\r
- }\r
-\r
-\r
- private void indexResourceRTF(Resource r) {\r
-\r
- QTemporaryFile f = writeResource(r.getData());\r
- if (!keepRunning) {\r
- return;\r
- }\r
- \r
- InputStream input;\r
- try {\r
- input = new FileInputStream(new File(f.fileName()));\r
- ContentHandler textHandler = new BodyContentHandler(-1);\r
- Metadata metadata = new Metadata();\r
- RTFParser parser = new RTFParser(); \r
- ParseContext context = new ParseContext();\r
- parser.parse(input, textHandler, metadata, context);\r
- String[] result = textHandler.toString().split(regex);\r
- for (int i=0; i<result.length && keepRunning; i++) {\r
- addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
- }\r
- input.close();\r
- \r
- f.close();\r
- } catch (java.lang.ClassCastException e) {\r
- logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
- } catch (FileNotFoundException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (IOException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (SAXException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (TikaException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (Exception e) {\r
- e.printStackTrace();\r
- }\r
- }\r
-\r
- \r
- private void indexResourceODF(Resource r) {\r
-\r
- QTemporaryFile f = writeResource(r.getData());\r
- if (!keepRunning) {\r
- return;\r
- }\r
- \r
- InputStream input;\r
- try {\r
- input = new FileInputStream(new File(f.fileName()));\r
- ContentHandler textHandler = new BodyContentHandler(-1);\r
- Metadata metadata = new Metadata();\r
- OpenDocumentParser parser = new OpenDocumentParser(); \r
- ParseContext context = new ParseContext();\r
- parser.parse(input, textHandler, metadata, context);\r
- String[] result = textHandler.toString().split(regex);\r
- for (int i=0; i<result.length && keepRunning; i++) {\r
- addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
- }\r
- input.close();\r
- \r
- f.close();\r
- } catch (java.lang.ClassCastException e) {\r
- logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
- } catch (FileNotFoundException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (IOException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (SAXException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (TikaException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (Exception e) {\r
- e.printStackTrace();\r
- }\r
- }\r
-\r
- \r
- private void indexResourceOffice(Resource r) {\r
-\r
- QTemporaryFile f = writeResource(r.getData());\r
- if (!keepRunning) {\r
- return;\r
- }\r
- \r
- InputStream input;\r
- try {\r
- input = new FileInputStream(new File(f.fileName()));\r
- ContentHandler textHandler = new BodyContentHandler(-1);\r
- Metadata metadata = new Metadata();\r
- OfficeParser parser = new OfficeParser(); \r
- ParseContext context = new ParseContext();\r
- parser.parse(input, textHandler, metadata, context);\r
- String[] result = textHandler.toString().split(regex);\r
- for (int i=0; i<result.length && keepRunning; i++) {\r
- addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
- }\r
- input.close();\r
- \r
- f.close();\r
- } catch (java.lang.ClassCastException e) {\r
- logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
- } catch (FileNotFoundException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (IOException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (SAXException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (TikaException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (Exception e) {\r
- e.printStackTrace();\r
- }\r
- }\r
-\r
- \r
- \r
- private void indexResourcePDF(Resource r) {\r
-\r
- QTemporaryFile f = writeResource(r.getData());\r
- if (!keepRunning) {\r
- return;\r
- }\r
- \r
- InputStream input;\r
- try { \r
- input = new FileInputStream(new File(f.fileName()));\r
- ContentHandler textHandler = new BodyContentHandler(-1);\r
- Metadata metadata = new Metadata();\r
- PDFParser parser = new PDFParser(); \r
- ParseContext context = new ParseContext();\r
- parser.parse(input, textHandler, metadata, context);\r
- String[] result = textHandler.toString().split(regex);\r
- for (int i=0; i<result.length && keepRunning; i++) {\r
- addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
- }\r
- input.close();\r
- \r
- f.close();\r
- } catch (java.lang.ClassCastException e) {\r
- logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
- } catch (FileNotFoundException e) {\r
- e.printStackTrace();\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- } catch (SAXException e) {\r
- e.printStackTrace();\r
- } catch (TikaException e) {\r
- e.printStackTrace();\r
- } catch (Exception e) {\r
- e.printStackTrace();\r
- }\r
- }\r
- \r
- \r
- private void indexResourceOOXML(Resource r) {\r
-\r
- QTemporaryFile f = writeResource(r.getData());\r
- if (!keepRunning) {\r
- return;\r
- }\r
- \r
- InputStream input;\r
- try {\r
- input = new FileInputStream(new File(f.fileName()));\r
- ContentHandler textHandler = new BodyContentHandler(-1);\r
- Metadata metadata = new Metadata();\r
- OOXMLParser parser = new OOXMLParser(); \r
- ParseContext context = new ParseContext();\r
- parser.parse(input, textHandler, metadata, context);\r
- String[] result = textHandler.toString().split(regex);\r
- for (int i=0; i<result.length && keepRunning; i++) {\r
- addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
- }\r
- input.close();\r
- \r
- f.close();\r
- } catch (java.lang.ClassCastException e) {\r
- logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
- } catch (FileNotFoundException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (IOException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (SAXException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (TikaException e) {\r
- // TODO Auto-generated catch block\r
- e.printStackTrace();\r
- } catch (Exception e) {\r
- e.printStackTrace();\r
- }\r
- }\r
- \r
-\r
- \r
- private QTemporaryFile writeResource(Data d) {\r
- QTemporaryFile newFile = new QTemporaryFile();\r
- newFile.open(OpenModeFlag.WriteOnly);\r
- newFile.write(d.getBody());\r
- newFile.close();\r
- return newFile;\r
- }\r
-\r
- \r
- private String removeEnCrypt(String content) {\r
- int index = content.indexOf("<en-crypt");\r
- int endPos;\r
- boolean tagFound = true;\r
- while (tagFound && keepRunning) {\r
- endPos = content.indexOf("</en-crypt>", index)+11;\r
- if (endPos > -1 && index > -1) {\r
- content = content.substring(0,index)+content.substring(endPos);\r
- index = content.indexOf("<en-crypt");\r
- } else {\r
- tagFound = false;\r
- }\r
- }\r
- return content;\r
- }\r
-\r
- \r
- private void addToIndex(String guid, String word, String type) {\r
- if (word.length() > 0) {\r
- // We have a good word, now let's trim off junk at the beginning or end\r
- StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
- conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
- }\r
- return;\r
- }\r
- \r
- private void scanUnindexed() {\r
- List<String> notes = conn.getNoteTable().getUnindexed();\r
- guid = null;\r
- boolean started = false;\r
- if (notes.size() > 0) {\r
- signal.indexStarted.emit();\r
- started = true;\r
- }\r
- for (int i=0; i<notes.size() && !interrupt && keepRunning; i++) {\r
- guid = notes.get(i);\r
- if (guid != null && keepRunning) {\r
- indexNoteContent();\r
- }\r
- }\r
- \r
- List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();\r
- if (unindexedResources.size() > 0 && !started) {\r
- signal.indexStarted.emit();\r
- started = true;\r
- }\r
- for (int i=0; i<unindexedResources.size()&& !interrupt && keepRunning; i++) {\r
- guid = unindexedResources.get(i);\r
- if (keepRunning) {\r
- indexResource();\r
- }\r
- }\r
- if (started && keepRunning && !interrupt) \r
- signal.indexFinished.emit();\r
- }\r
- \r
- private void reindexNote() {\r
- if (guid == null)\r
- return;\r
- conn.getNoteTable().setIndexNeeded(guid, true);\r
- }\r
- \r
- private void reindexAll() {\r
- conn.getNoteTable().reindexAllNotes();\r
- conn.getNoteTable().noteResourceTable.reindexAll(); \r
- }\r
-\r
-}\r
+/*
+ * This file is part of NixNote/NeighborNote
+ * Copyright 2009 Randy Baumgarte
+ * Copyright 2013 Yuki Takahashi
+ *
+ * This file may be licensed under the terms of of the
+ * GNU General Public License Version 2 (the ``GPL'').
+ *
+ * Software distributed under the License is distributed
+ * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
+ * express or implied. See the GPL for the specific language
+ * governing rights and limitations.
+ *
+ * You should have received a copy of the GPL along with this
+ * program. If not, go to http://www.gnu.org/licenses/gpl.html
+ * or write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+*/
+
+package cx.fbn.nevernote.threads;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+import java.util.TreeSet;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.locks.LockSupport;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.microsoft.OfficeParser;
+import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
+import org.apache.tika.parser.odf.OpenDocumentParser;
+import org.apache.tika.parser.pdf.PDFParser;
+import org.apache.tika.parser.rtf.RTFParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import com.evernote.edam.type.Data;
+import com.evernote.edam.type.Resource;
+import com.trolltech.qt.core.QByteArray;
+import com.trolltech.qt.core.QIODevice.OpenModeFlag;
+import com.trolltech.qt.core.QObject;
+import com.trolltech.qt.core.QTemporaryFile;
+import com.trolltech.qt.xml.QDomDocument;
+import com.trolltech.qt.xml.QDomElement;
+import com.trolltech.qt.xml.QDomNodeList;
+
+import cx.fbn.nevernote.Global;
+import cx.fbn.nevernote.signals.IndexSignal;
+import cx.fbn.nevernote.signals.NoteResourceSignal;
+import cx.fbn.nevernote.signals.NoteSignal;
+import cx.fbn.nevernote.sql.DatabaseConnection;
+import cx.fbn.nevernote.utilities.ApplicationLogger;
+
+public class IndexRunner extends QObject implements Runnable {
+
+ private final ApplicationLogger logger;
+ private String guid;
+ private QByteArray resourceBinary;
+ public volatile NoteSignal noteSignal;
+ public volatile NoteResourceSignal resourceSignal;
+ private int indexType;
+ public final int SCAN=1;
+ public final int REINDEXALL=2;
+ public final int REINDEXNOTE=3;
+ public boolean keepRunning;
+ private final QDomDocument doc;
+ private static String regex = Global.getWordRegex();
+ public String specialIndexCharacters = "";
+// public boolean indexNoteBody = true;
+// public boolean indexNoteTitle = true;
+ public boolean indexImageRecognition = true;
+ private final DatabaseConnection conn;
+ private volatile LinkedBlockingQueue<String> workQueue;
+ private static int MAX_QUEUED_WAITING = 1000;
+ public boolean interrupt;
+ public boolean idle;
+ public boolean indexAttachmentsLocally = true;
+ public volatile IndexSignal signal;
+ private final TreeSet<String> foundWords;
+ int uncommittedCount = 0;
+
+ // ICHANGED String bを追加
+ public IndexRunner(String logname, String u, String i, String r, String b, String uid, String pswd, String cpswd) {
+ foundWords = new TreeSet<String>();
+ logger = new ApplicationLogger(logname);
+ // ICHANGED bを追加
+ conn = new DatabaseConnection(logger, u, i, r, b, uid, pswd, cpswd, 500);
+ indexType = SCAN;
+ guid = null;
+ keepRunning = true;
+ doc = new QDomDocument();
+ workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);
+ }
+
+ public void setIndexType(int t) {
+ indexType = t;
+ }
+
+
+ @Override
+ public void run() {
+ thread().setPriority(Thread.MIN_PRIORITY);
+ noteSignal = new NoteSignal();
+ resourceSignal = new NoteResourceSignal();
+ signal = new IndexSignal();
+ logger.log(logger.EXTREME, "Starting index thread ");
+ while (keepRunning) {
+ idle=true;
+ try {
+ conn.commitTransaction();
+ uncommittedCount = 0;
+ String work = workQueue.take();
+ idle=false;
+ if (work.startsWith("SCAN")) {
+ guid=null;
+ interrupt = false;
+ indexType = SCAN;
+ }
+ if (work.startsWith("REINDEXALL")) {
+ guid = null;
+ indexType=REINDEXALL;
+ }
+ if (work.startsWith("REINDEXNOTE")) {
+ work = work.replace("REINDEXNOTE ", "");
+ guid = work;
+ indexType = REINDEXNOTE;
+ }
+ if (work.startsWith("STOP")) {
+ keepRunning = false;
+ guid = null;
+ }
+ logger.log(logger.EXTREME, "Type:" +indexType);
+ if (indexType == SCAN && keepRunning) {
+ logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");
+ scanUnindexed();
+ setIndexType(0);
+ }
+ if (indexType == REINDEXALL && keepRunning) {
+ logger.log(logger.MEDIUM, "Marking all for reindex");
+ reindexAll();
+ setIndexType(0);
+ }
+ if (indexType == REINDEXNOTE && keepRunning) {
+ reindexNote();
+ }
+ } catch (InterruptedException e) {
+ logger.log(logger.LOW, "Thread interrupted exception: " +e.getMessage());
+ }
+ }
+ logger.log(logger.EXTREME, "Shutting down database");
+ conn.dbShutdown();
+ logger.log(logger.EXTREME, "Database shut down. Exiting thread");
+ }
+
+ // Reindex a note
+// public void indexNoteContent() {
+// foundWords.clear();
+//
+// logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");
+//
+// logger.log(logger.EXTREME, "Getting note content");
+// Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);
+// String data;
+// if (indexNoteBody) {
+// data = n.getContent();
+// data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());
+//
+// logger.log(logger.EXTREME, "Removing any encrypted data");
+// data = removeEnCrypt(data.toString());
+// logger.log(logger.EXTREME, "Removing xml markups");
+// } else
+// data = "";
+// String text;
+// if (indexNoteTitle)
+// text = removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());
+// else
+// text = removeTags(StringEscapeUtils.unescapeHtml4(data));
+//
+// logger.log(logger.EXTREME, "Splitting words");
+// String[] result = text.toString().split(regex);
+// conn.commitTransaction();
+// conn.beginTransaction();
+// logger.log(logger.EXTREME, "Deleting existing words for note from index");
+// conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");
+//
+// logger.log(logger.EXTREME, "Number of words found: " +result.length);
+// for (int j=0; j<result.length && keepRunning; j++) {
+// if (interrupt) {
+// processInterrupt();
+// }
+// if (!result[j].trim().equals("")) {
+// logger.log(logger.EXTREME, "Result word: " +result[j].trim());
+// addToIndex(guid, result[j], "CONTENT");
+// }
+// }
+//
+// // Add tags
+// for (int j=0; j<n.getTagNamesSize(); j++) {
+// if (n.getTagNames() != null && n.getTagNames().get(j) != null && !n.getTagNames().get(j).trim().equals(""))
+// addToIndex(guid, n.getTagNames().get(j), "CONTENT");
+// }
+//
+// // If we were interrupted, we will reindex this note next time
+// if (Global.keepRunning) {
+// logger.log(logger.EXTREME, "Resetting note guid needed");
+// conn.getNoteTable().setIndexNeeded(guid, false);
+// }
+// conn.commitTransaction();
+// uncommittedCount = 0;
+// logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");
+// }
+
+
+ private String removeTags(String text) {
+ StringBuffer buffer = new StringBuffer(text);
+ boolean inTag = false;
+ for (int i=buffer.length()-1; i>=0; i--) {
+ if (buffer.charAt(i) == '>')
+ inTag = true;
+ if (buffer.charAt(i) == '<')
+ inTag = false;
+ if (inTag || buffer.charAt(i) == '<')
+ buffer.deleteCharAt(i);
+ }
+
+ return buffer.toString();
+ }
+
+
+ public synchronized boolean addWork(String request) {
+ if (workQueue.size() == 0) {
+ workQueue.offer(request);
+ return true;
+ }
+ return false;
+ }
+
+ public synchronized int getWorkQueueSize() {
+ return workQueue.size();
+ }
+
+ public void indexResource() {
+
+ if (guid == null)
+ return;
+ foundWords.clear();
+ Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);
+ if (!indexImageRecognition ||
+ r == null || r.getRecognition() == null ||
+ r.getRecognition().getBody() == null ||
+ r.getRecognition().getBody().length == 0)
+ resourceBinary = new QByteArray(" ");
+ else
+ resourceBinary = new QByteArray(r.getRecognition().getBody());
+
+ conn.commitTransaction();
+ conn.beginTransaction();
+ conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");
+ // This is due to an old bug & can be removed at some point in the future 11/23/2010
+ conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");
+ conn.commitTransaction();
+ uncommittedCount = 0;
+ conn.beginTransaction();
+
+ doc.setContent(resourceBinary);
+ QDomElement docElem = doc.documentElement();
+
+ // look for text tags
+ QDomNodeList anchors = docElem.elementsByTagName("t");
+ for (int i=0; i<anchors.length() && keepRunning; i++) {
+ if (interrupt) {
+ if (interrupt) {
+ processInterrupt();
+ }
+ }
+ QDomElement enmedia = anchors.at(i).toElement();
+ String weight = new String(enmedia.attribute("w"));
+ String text = new String(enmedia.text()).toLowerCase();
+ if (!text.equals("")) {
+ conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));
+ uncommittedCount++;
+ if (uncommittedCount > 100) {
+ conn.commitTransaction();
+ uncommittedCount=0;
+ }
+ }
+ }
+
+ if (Global.keepRunning && indexAttachmentsLocally) {
+ conn.commitTransaction();
+ uncommittedCount = 0;
+ conn.beginTransaction();
+ indexResourceContent(guid);
+ }
+
+ if (Global.keepRunning)
+ conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);
+ conn.commitTransaction();
+ uncommittedCount = 0;
+ }
+
+ private void indexResourceContent(String guid) {
+ Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);
+ if (r != null && r.getMime() != null) {
+ if (r.getMime().equalsIgnoreCase("application/pdf")) {
+ indexResourcePDF(r);
+ return;
+ }
+ if (r.getMime().equalsIgnoreCase("application/docx") ||
+ r.getMime().equalsIgnoreCase("application/xlsx") ||
+ r.getMime().equalsIgnoreCase("application/pptx")) {
+ indexResourceOOXML(r);
+ return;
+ }
+ if (r.getMime().equalsIgnoreCase("application/vsd") ||
+ r.getMime().equalsIgnoreCase("application/ppt") ||
+ r.getMime().equalsIgnoreCase("application/xls") ||
+ r.getMime().equalsIgnoreCase("application/msg") ||
+ r.getMime().equalsIgnoreCase("application/doc")) {
+ indexResourceOffice(r);
+ return;
+ }
+ if (r.getMime().equalsIgnoreCase("application/rtf")) {
+ indexResourceRTF(r);
+ return;
+ }
+ if (r.getMime().equalsIgnoreCase("application/odf") ||
+ r.getMime().equalsIgnoreCase("application/odt") ||
+ r.getMime().equalsIgnoreCase("application/odp") ||
+ r.getMime().equalsIgnoreCase("application/odg") ||
+ r.getMime().equalsIgnoreCase("application/odb") ||
+ r.getMime().equalsIgnoreCase("application/ods")) {
+ indexResourceODF(r);
+ return;
+ }
+ }
+ }
+
+
+ private void indexResourceRTF(Resource r) {
+
+ Data d = r.getData();
+ for (int i=0; i<20 && d.getSize() == 0; i++)
+ d = r.getData();
+ if (d.getSize()== 0)
+ return;
+
+ QTemporaryFile f = writeResource(d);
+ if (!keepRunning) {
+ return;
+ }
+
+ InputStream input;
+ try {
+ input = new FileInputStream(new File(f.fileName()));
+ ContentHandler textHandler = new BodyContentHandler(-1);
+ Metadata metadata = new Metadata();
+ RTFParser parser = new RTFParser();
+ ParseContext context = new ParseContext();
+ parser.parse(input, textHandler, metadata, context);
+ String[] result = textHandler.toString().split(regex);
+ for (int i=0; i<result.length && keepRunning; i++) {
+ addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+ }
+ input.close();
+
+ f.close();
+ } catch (java.lang.ClassCastException e) {
+ logger.log(logger.LOW, "Cast exception: " +e.getMessage());
+ } catch (FileNotFoundException e) {
+ logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
+ } catch (IOException e) {
+ logger.log(logger.LOW, "IO exception: " +e.getMessage());
+ } catch (SAXException e) {
+ logger.log(logger.LOW, "SAX exception: " +e.getMessage());
+ } catch (TikaException e) {
+ logger.log(logger.LOW, "Tika exception: " +e.getMessage());
+ } catch (Exception e) {
+ logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
+ } catch (java.lang.NoSuchMethodError e) {
+ logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
+ } catch (Error e) {
+ logger.log(logger.LOW, "Unknown error: " +e.getMessage());
+ }
+ }
+
+
+ private void indexResourceODF(Resource r) {
+
+ Data d = r.getData();
+ for (int i=0; i<20 && d.getSize() == 0; i++)
+ d = r.getData();
+ if (d.getSize()== 0)
+ return;
+ QTemporaryFile f = writeResource(d);
+ if (!keepRunning) {
+ return;
+ }
+
+ InputStream input;
+ try {
+ input = new FileInputStream(new File(f.fileName()));
+ ContentHandler textHandler = new BodyContentHandler(-1);
+ Metadata metadata = new Metadata();
+ OpenDocumentParser parser = new OpenDocumentParser();
+ ParseContext context = new ParseContext();
+ parser.parse(input, textHandler, metadata, context);
+ String[] result = textHandler.toString().split(regex);
+ for (int i=0; i<result.length && keepRunning; i++) {
+ if (interrupt) {
+ processInterrupt();
+ }
+ addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+ }
+ input.close();
+
+ f.close();
+ } catch (java.lang.ClassCastException e) {
+ logger.log(logger.LOW, "Cast exception: " +e.getMessage());
+ } catch (FileNotFoundException e) {
+ logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
+ } catch (IOException e) {
+ logger.log(logger.LOW, "IO exception: " +e.getMessage());
+ } catch (SAXException e) {
+ logger.log(logger.LOW, "SAX exception: " +e.getMessage());
+ } catch (TikaException e) {
+ logger.log(logger.LOW, "Tika exception: " +e.getMessage());
+ } catch (Exception e) {
+ logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
+ } catch (java.lang.NoSuchMethodError e) {
+ logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
+ } catch (Error e) {
+ logger.log(logger.LOW, "Unknown error: " +e.getMessage());
+ }
+ }
+
+
+ private void indexResourceOffice(Resource r) {
+
+ Data d = r.getData();
+ for (int i=0; i<20 && d.getSize() == 0; i++)
+ d = r.getData();
+ if (d.getSize()== 0)
+ return;
+ QTemporaryFile f = writeResource(d);
+ if (!keepRunning) {
+ return;
+ }
+
+ InputStream input;
+ try {
+ input = new FileInputStream(new File(f.fileName()));
+ ContentHandler textHandler = new BodyContentHandler(-1);
+ Metadata metadata = new Metadata();
+ OfficeParser parser = new OfficeParser();
+ ParseContext context = new ParseContext();
+ parser.parse(input, textHandler, metadata, context);
+ String[] result = textHandler.toString().split(regex);
+ for (int i=0; i<result.length && keepRunning; i++) {
+ if (interrupt) {
+ processInterrupt();
+ }
+ addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+ }
+ input.close();
+
+ f.close();
+ } catch (java.lang.ClassCastException e) {
+ logger.log(logger.LOW, "Cast exception: " +e.getMessage());
+ } catch (FileNotFoundException e) {
+ logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
+ } catch (IOException e) {
+ logger.log(logger.LOW, "IO exception: " +e.getMessage());
+ } catch (SAXException e) {
+ logger.log(logger.LOW, "SAX exception: " +e.getMessage());
+ } catch (TikaException e) {
+ logger.log(logger.LOW, "Tika exception: " +e.getMessage());
+ } catch (Exception e) {
+ logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
+ } catch (java.lang.NoSuchMethodError e) {
+ logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
+ } catch (Error e) {
+ logger.log(logger.LOW, "Unknown error: " +e.getMessage());
+ }
+ }
+
+
+
+ private void indexResourcePDF(Resource r) {
+
+ Data d = r.getData();
+ for (int i=0; i<20 && d.getSize() == 0; i++)
+ d = r.getData();
+ if (d.getSize()== 0)
+ return;
+ QTemporaryFile f = writeResource(d);
+ if (!keepRunning) {
+ return;
+ }
+
+ InputStream input;
+ try {
+ input = new FileInputStream(new File(f.fileName()));
+ ContentHandler textHandler = new BodyContentHandler(-1);
+ Metadata metadata = new Metadata();
+ PDFParser parser = new PDFParser();
+ ParseContext context = new ParseContext();
+ parser.parse(input, textHandler, metadata, context);
+ String[] result = textHandler.toString().split(regex);
+ for (int i=0; i<result.length && keepRunning; i++) {
+ if (interrupt) {
+ processInterrupt();
+ }
+ addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+ }
+ input.close();
+
+ f.close();
+ } catch (java.lang.ClassCastException e) {
+ logger.log(logger.LOW, "Cast exception: " +e.getMessage());
+ } catch (FileNotFoundException e) {
+ logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
+ } catch (IOException e) {
+ logger.log(logger.LOW, "IO exception: " +e.getMessage());
+ } catch (SAXException e) {
+ logger.log(logger.LOW, "SAX exception: " +e.getMessage());
+ } catch (TikaException e) {
+ logger.log(logger.LOW, "Tika exception: " +e.getMessage());
+ } catch (Exception e) {
+ logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
+ } catch (java.lang.NoSuchMethodError e) {
+ logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
+ } catch (Error e) {
+ logger.log(logger.LOW, "Unknown error: " +e.getMessage());
+ }
+ }
+
+
+ private void indexResourceOOXML(Resource r) {
+
+ Data d = r.getData();
+ for (int i=0; i<20 && d.getSize() == 0; i++)
+ d = r.getData();
+ if (d.getSize()== 0)
+ return;
+ QTemporaryFile f = writeResource(d);
+ if (!keepRunning) {
+ return;
+ }
+
+ InputStream input;
+ try {
+ input = new FileInputStream(new File(f.fileName()));
+ ContentHandler textHandler = new BodyContentHandler(-1);
+ Metadata metadata = new Metadata();
+ OOXMLParser parser = new OOXMLParser();
+ ParseContext context = new ParseContext();
+ parser.parse(input, textHandler, metadata, context);
+ String[] result = textHandler.toString().split(regex);
+ for (int i=0; i<result.length && keepRunning; i++) {
+ if (interrupt) {
+ processInterrupt();
+ }
+ addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+ }
+ input.close();
+
+ f.close();
+ } catch (java.lang.ClassCastException e) {
+ logger.log(logger.LOW, "Cast exception: " +e.getMessage());
+ } catch (FileNotFoundException e) {
+ logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
+ } catch (IOException e) {
+ logger.log(logger.LOW, "IO exception: " +e.getMessage());
+ } catch (SAXException e) {
+ logger.log(logger.LOW, "SAX exception: " +e.getMessage());
+ } catch (TikaException e) {
+ logger.log(logger.LOW, "Tika exception: " +e.getMessage());
+ } catch (Exception e) {
+ logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
+ } catch (java.lang.NoSuchMethodError e) {
+ logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
+ } catch (Error e) {
+ logger.log(logger.LOW, "Unknown error: " +e.getMessage()); }
+ }
+
+
+
+ private QTemporaryFile writeResource(Data d) {
+ QTemporaryFile newFile = new QTemporaryFile();
+ newFile.open(OpenModeFlag.WriteOnly);
+ newFile.write(d.getBody());
+ newFile.close();
+ return newFile;
+ }
+
+
+ private String removeEnCrypt(String content) {
+ int index = content.indexOf("<en-crypt");
+ int endPos;
+ boolean tagFound = true;
+ while (tagFound && keepRunning) {
+ if (interrupt) {
+ processInterrupt();
+ }
+ endPos = content.indexOf("</en-crypt>", index)+11;
+ if (endPos > -1 && index > -1) {
+ content = content.substring(0,index)+content.substring(endPos);
+ index = content.indexOf("<en-crypt");
+ } else {
+ tagFound = false;
+ }
+ }
+ return content;
+ }
+
+
+ private void addToIndex(String guid, String word, String type) {
+ if (foundWords.contains(word))
+ return;
+ StringBuffer buffer = new StringBuffer(word.toLowerCase());
+ for (int i=buffer.length()-1; i>=0; i--) {
+ if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)
+ buffer.deleteCharAt(i);
+ else
+ break;
+ }
+ buffer = buffer.reverse();
+ for (int i=buffer.length()-1; i>=0; i--) {
+ if (!Character.isLetterOrDigit(buffer.charAt(i)))
+ buffer.deleteCharAt(i);
+ else
+ break;
+ }
+ buffer = buffer.reverse();
+ if (buffer.length() > 0) {
+ // We have a good word, now let's trim off junk at the beginning or end
+ if (!foundWords.contains(buffer.toString())) {
+ foundWords.add(buffer.toString());
+ foundWords.add(word);
+ conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);
+ uncommittedCount++;
+ if (uncommittedCount > 100) {
+ conn.commitTransaction();
+ uncommittedCount=0;
+ }
+ }
+ }
+ return;
+ }
+
+ private void scanUnindexed() {
+// List<String> notes = conn.getNoteTable().getUnindexed();
+ guid = null;
+ boolean started = false;
+// if (notes.size() > 0) {
+// signal.indexStarted.emit();
+// started = true;
+// }
+// for (int i=0; i<notes.size() && keepRunning; i++) {
+// if (interrupt) {
+// processInterrupt();
+// }
+// guid = notes.get(i);
+// if (guid != null && keepRunning) {
+// indexNoteContent();
+// }
+// }
+
+ List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();
+ if (unindexedResources.size() > 0 && !started) {
+ signal.indexStarted.emit();
+ started = true;
+ }
+ for (int i=0; i<unindexedResources.size()&& keepRunning; i++) {
+ if (interrupt) {
+ processInterrupt();
+ }
+ guid = unindexedResources.get(i);
+ if (keepRunning) {
+ indexResource();
+ }
+ }
+
+ // Cleanup stuff that was deleted at some point
+ List<String> guids = conn.getWordsTable().getGuidList();
+ logger.log(logger.LOW, "GUIDS in index: " +guids.size());
+ for (int i=0; i<guids.size() && keepRunning; i++) {
+ if (!conn.getNoteTable().exists(guids.get(i))) {
+ logger.log(logger.LOW, "Old GUID found: " +guids.get(i));
+ conn.getWordsTable().expunge(guids.get(i));
+ }
+ }
+
+ if (started && keepRunning)
+ signal.indexFinished.emit();
+ }
+
+ private void reindexNote() {
+ if (guid == null)
+ return;
+ conn.getNoteTable().setIndexNeeded(guid, true);
+ }
+
+ private void reindexAll() {
+ conn.getNoteTable().reindexAllNotes();
+ conn.getNoteTable().noteResourceTable.reindexAll();
+ }
+
+ private void waitSeconds(int len) {
+ long starttime = 0; // variable declared
+ //...
+ // for the first time, remember the timestamp
+ starttime = System.currentTimeMillis();
+ // the next timestamp we want to wake up
+ starttime += (1000.0);
+ // Wait until the desired next time arrives using nanosecond
+ // accuracy timer (wait(time) isn't accurate enough on most platforms)
+ LockSupport.parkNanos((Math.max(0,
+ starttime - System.currentTimeMillis()) * 1000000));
+ }
+
+ private void processInterrupt() {
+ conn.commitTransaction();
+ waitSeconds(1);
+ uncommittedCount = 0;
+ conn.beginTransaction();
+ interrupt = false;
+ }
+
+}