2 * This file is part of NixNote/NeighborNote
3 * Copyright 2009 Randy Baumgarte
4 * Copyright 2013 Yuki Takahashi
6 * This file may be licensed under the terms of of the
7 * GNU General Public License Version 2 (the ``GPL'').
9 * Software distributed under the License is distributed
10 * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
11 * express or implied. See the GPL for the specific language
12 * governing rights and limitations.
14 * You should have received a copy of the GPL along with this
15 * program. If not, go to http://www.gnu.org/licenses/gpl.html
16 * or write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 package cx.fbn.nevernote.threads;
24 import java.io.FileInputStream;
25 import java.io.FileNotFoundException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.util.List;
29 import java.util.TreeSet;
30 import java.util.concurrent.LinkedBlockingQueue;
31 import java.util.concurrent.locks.LockSupport;
33 import org.apache.tika.exception.TikaException;
34 import org.apache.tika.metadata.Metadata;
35 import org.apache.tika.parser.ParseContext;
36 import org.apache.tika.parser.microsoft.OfficeParser;
37 import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
38 import org.apache.tika.parser.odf.OpenDocumentParser;
39 import org.apache.tika.parser.pdf.PDFParser;
40 import org.apache.tika.parser.rtf.RTFParser;
41 import org.apache.tika.sax.BodyContentHandler;
42 import org.xml.sax.ContentHandler;
43 import org.xml.sax.SAXException;
45 import com.evernote.edam.type.Data;
46 import com.evernote.edam.type.Resource;
47 import com.trolltech.qt.core.QByteArray;
48 import com.trolltech.qt.core.QIODevice.OpenModeFlag;
49 import com.trolltech.qt.core.QObject;
50 import com.trolltech.qt.core.QTemporaryFile;
51 import com.trolltech.qt.xml.QDomDocument;
52 import com.trolltech.qt.xml.QDomElement;
53 import com.trolltech.qt.xml.QDomNodeList;
55 import cx.fbn.nevernote.Global;
56 import cx.fbn.nevernote.signals.IndexSignal;
57 import cx.fbn.nevernote.signals.NoteResourceSignal;
58 import cx.fbn.nevernote.signals.NoteSignal;
59 import cx.fbn.nevernote.sql.DatabaseConnection;
60 import cx.fbn.nevernote.utilities.ApplicationLogger;
62 public class IndexRunner extends QObject implements Runnable {
64 private final ApplicationLogger logger;
66 private QByteArray resourceBinary;
67 public volatile NoteSignal noteSignal;
68 public volatile NoteResourceSignal resourceSignal;
69 private int indexType;
70 public final int SCAN=1;
71 public final int REINDEXALL=2;
72 public final int REINDEXNOTE=3;
73 public boolean keepRunning;
74 private final QDomDocument doc;
75 private static String regex = Global.getWordRegex();
76 public String specialIndexCharacters = "";
77 // public boolean indexNoteBody = true;
78 // public boolean indexNoteTitle = true;
79 public boolean indexImageRecognition = true;
80 private final DatabaseConnection conn;
81 private volatile LinkedBlockingQueue<String> workQueue;
82 private static int MAX_QUEUED_WAITING = 1000;
83 public boolean interrupt;
85 public boolean indexAttachmentsLocally = true;
86 public volatile IndexSignal signal;
87 private final TreeSet<String> foundWords;
88 int uncommittedCount = 0;
90 // ICHANGED String bを追加
91 public IndexRunner(String logname, String u, String i, String r, String b, String uid, String pswd, String cpswd) {
92 foundWords = new TreeSet<String>();
93 logger = new ApplicationLogger(logname);
95 conn = new DatabaseConnection(logger, u, i, r, b, uid, pswd, cpswd, 500);
99 doc = new QDomDocument();
100 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);
103 public void setIndexType(int t) {
110 thread().setPriority(Thread.MIN_PRIORITY);
111 noteSignal = new NoteSignal();
112 resourceSignal = new NoteResourceSignal();
113 signal = new IndexSignal();
114 logger.log(logger.EXTREME, "Starting index thread ");
115 while (keepRunning) {
118 conn.commitTransaction();
119 uncommittedCount = 0;
120 String work = workQueue.take();
122 if (work.startsWith("SCAN")) {
127 if (work.startsWith("REINDEXALL")) {
129 indexType=REINDEXALL;
131 if (work.startsWith("REINDEXNOTE")) {
132 work = work.replace("REINDEXNOTE ", "");
134 indexType = REINDEXNOTE;
136 if (work.startsWith("STOP")) {
140 logger.log(logger.EXTREME, "Type:" +indexType);
141 if (indexType == SCAN && keepRunning) {
142 logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");
146 if (indexType == REINDEXALL && keepRunning) {
147 logger.log(logger.MEDIUM, "Marking all for reindex");
151 if (indexType == REINDEXNOTE && keepRunning) {
154 } catch (InterruptedException e) {
155 logger.log(logger.LOW, "Thread interrupted exception: " +e.getMessage());
158 logger.log(logger.EXTREME, "Shutting down database");
160 logger.log(logger.EXTREME, "Database shut down. Exiting thread");
164 // public void indexNoteContent() {
165 // foundWords.clear();
167 // logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");
169 // logger.log(logger.EXTREME, "Getting note content");
170 // Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);
172 // if (indexNoteBody) {
173 // data = n.getContent();
174 // data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());
176 // logger.log(logger.EXTREME, "Removing any encrypted data");
177 // data = removeEnCrypt(data.toString());
178 // logger.log(logger.EXTREME, "Removing xml markups");
182 // if (indexNoteTitle)
183 // text = removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());
185 // text = removeTags(StringEscapeUtils.unescapeHtml4(data));
187 // logger.log(logger.EXTREME, "Splitting words");
188 // String[] result = text.toString().split(regex);
189 // conn.commitTransaction();
190 // conn.beginTransaction();
191 // logger.log(logger.EXTREME, "Deleting existing words for note from index");
192 // conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");
194 // logger.log(logger.EXTREME, "Number of words found: " +result.length);
195 // for (int j=0; j<result.length && keepRunning; j++) {
197 // processInterrupt();
199 // if (!result[j].trim().equals("")) {
200 // logger.log(logger.EXTREME, "Result word: " +result[j].trim());
201 // addToIndex(guid, result[j], "CONTENT");
206 // for (int j=0; j<n.getTagNamesSize(); j++) {
207 // if (n.getTagNames() != null && n.getTagNames().get(j) != null && !n.getTagNames().get(j).trim().equals(""))
208 // addToIndex(guid, n.getTagNames().get(j), "CONTENT");
211 // // If we were interrupted, we will reindex this note next time
212 // if (Global.keepRunning) {
213 // logger.log(logger.EXTREME, "Resetting note guid needed");
214 // conn.getNoteTable().setIndexNeeded(guid, false);
216 // conn.commitTransaction();
217 // uncommittedCount = 0;
218 // logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");
222 private String removeTags(String text) {
223 StringBuffer buffer = new StringBuffer(text);
224 boolean inTag = false;
225 for (int i=buffer.length()-1; i>=0; i--) {
226 if (buffer.charAt(i) == '>')
228 if (buffer.charAt(i) == '<')
230 if (inTag || buffer.charAt(i) == '<')
231 buffer.deleteCharAt(i);
234 return buffer.toString();
238 public synchronized boolean addWork(String request) {
239 if (workQueue.size() == 0) {
240 workQueue.offer(request);
246 public synchronized int getWorkQueueSize() {
247 return workQueue.size();
250 public void indexResource() {
255 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);
256 if (!indexImageRecognition ||
257 r == null || r.getRecognition() == null ||
258 r.getRecognition().getBody() == null ||
259 r.getRecognition().getBody().length == 0)
260 resourceBinary = new QByteArray(" ");
262 resourceBinary = new QByteArray(r.getRecognition().getBody());
264 conn.commitTransaction();
265 conn.beginTransaction();
266 conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");
267 // This is due to an old bug & can be removed at some point in the future 11/23/2010
268 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");
269 conn.commitTransaction();
270 uncommittedCount = 0;
271 conn.beginTransaction();
273 doc.setContent(resourceBinary);
274 QDomElement docElem = doc.documentElement();
276 // look for text tags
277 QDomNodeList anchors = docElem.elementsByTagName("t");
278 for (int i=0; i<anchors.length() && keepRunning; i++) {
284 QDomElement enmedia = anchors.at(i).toElement();
285 String weight = new String(enmedia.attribute("w"));
286 String text = new String(enmedia.text()).toLowerCase();
287 if (!text.equals("")) {
288 conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));
290 if (uncommittedCount > 100) {
291 conn.commitTransaction();
297 if (Global.keepRunning && indexAttachmentsLocally) {
298 conn.commitTransaction();
299 uncommittedCount = 0;
300 conn.beginTransaction();
301 indexResourceContent(guid);
304 if (Global.keepRunning)
305 conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);
306 conn.commitTransaction();
307 uncommittedCount = 0;
310 private void indexResourceContent(String guid) {
311 Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);
312 if (r != null && r.getMime() != null) {
313 if (r.getMime().equalsIgnoreCase("application/pdf")) {
317 if (r.getMime().equalsIgnoreCase("application/docx") ||
318 r.getMime().equalsIgnoreCase("application/xlsx") ||
319 r.getMime().equalsIgnoreCase("application/pptx")) {
320 indexResourceOOXML(r);
323 if (r.getMime().equalsIgnoreCase("application/vsd") ||
324 r.getMime().equalsIgnoreCase("application/ppt") ||
325 r.getMime().equalsIgnoreCase("application/xls") ||
326 r.getMime().equalsIgnoreCase("application/msg") ||
327 r.getMime().equalsIgnoreCase("application/doc")) {
328 indexResourceOffice(r);
331 if (r.getMime().equalsIgnoreCase("application/rtf")) {
335 if (r.getMime().equalsIgnoreCase("application/odf") ||
336 r.getMime().equalsIgnoreCase("application/odt") ||
337 r.getMime().equalsIgnoreCase("application/odp") ||
338 r.getMime().equalsIgnoreCase("application/odg") ||
339 r.getMime().equalsIgnoreCase("application/odb") ||
340 r.getMime().equalsIgnoreCase("application/ods")) {
348 private void indexResourceRTF(Resource r) {
350 Data d = r.getData();
351 for (int i=0; i<20 && d.getSize() == 0; i++)
356 QTemporaryFile f = writeResource(d);
363 input = new FileInputStream(new File(f.fileName()));
364 ContentHandler textHandler = new BodyContentHandler(-1);
365 Metadata metadata = new Metadata();
366 RTFParser parser = new RTFParser();
367 ParseContext context = new ParseContext();
368 parser.parse(input, textHandler, metadata, context);
369 String[] result = textHandler.toString().split(regex);
370 for (int i=0; i<result.length && keepRunning; i++) {
371 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
376 } catch (java.lang.ClassCastException e) {
377 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
378 } catch (FileNotFoundException e) {
379 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
380 } catch (IOException e) {
381 logger.log(logger.LOW, "IO exception: " +e.getMessage());
382 } catch (SAXException e) {
383 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
384 } catch (TikaException e) {
385 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
386 } catch (Exception e) {
387 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
388 } catch (java.lang.NoSuchMethodError e) {
389 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
391 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
396 private void indexResourceODF(Resource r) {
398 Data d = r.getData();
399 for (int i=0; i<20 && d.getSize() == 0; i++)
403 QTemporaryFile f = writeResource(d);
410 input = new FileInputStream(new File(f.fileName()));
411 ContentHandler textHandler = new BodyContentHandler(-1);
412 Metadata metadata = new Metadata();
413 OpenDocumentParser parser = new OpenDocumentParser();
414 ParseContext context = new ParseContext();
415 parser.parse(input, textHandler, metadata, context);
416 String[] result = textHandler.toString().split(regex);
417 for (int i=0; i<result.length && keepRunning; i++) {
421 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
426 } catch (java.lang.ClassCastException e) {
427 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
428 } catch (FileNotFoundException e) {
429 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
430 } catch (IOException e) {
431 logger.log(logger.LOW, "IO exception: " +e.getMessage());
432 } catch (SAXException e) {
433 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
434 } catch (TikaException e) {
435 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
436 } catch (Exception e) {
437 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
438 } catch (java.lang.NoSuchMethodError e) {
439 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
441 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
446 private void indexResourceOffice(Resource r) {
448 Data d = r.getData();
449 for (int i=0; i<20 && d.getSize() == 0; i++)
453 QTemporaryFile f = writeResource(d);
460 input = new FileInputStream(new File(f.fileName()));
461 ContentHandler textHandler = new BodyContentHandler(-1);
462 Metadata metadata = new Metadata();
463 OfficeParser parser = new OfficeParser();
464 ParseContext context = new ParseContext();
465 parser.parse(input, textHandler, metadata, context);
466 String[] result = textHandler.toString().split(regex);
467 for (int i=0; i<result.length && keepRunning; i++) {
471 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
476 } catch (java.lang.ClassCastException e) {
477 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
478 } catch (FileNotFoundException e) {
479 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
480 } catch (IOException e) {
481 logger.log(logger.LOW, "IO exception: " +e.getMessage());
482 } catch (SAXException e) {
483 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
484 } catch (TikaException e) {
485 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
486 } catch (Exception e) {
487 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
488 } catch (java.lang.NoSuchMethodError e) {
489 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
491 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
497 private void indexResourcePDF(Resource r) {
499 Data d = r.getData();
500 for (int i=0; i<20 && d.getSize() == 0; i++)
504 QTemporaryFile f = writeResource(d);
511 input = new FileInputStream(new File(f.fileName()));
512 ContentHandler textHandler = new BodyContentHandler(-1);
513 Metadata metadata = new Metadata();
514 PDFParser parser = new PDFParser();
515 ParseContext context = new ParseContext();
516 parser.parse(input, textHandler, metadata, context);
517 String[] result = textHandler.toString().split(regex);
518 for (int i=0; i<result.length && keepRunning; i++) {
522 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
527 } catch (java.lang.ClassCastException e) {
528 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
529 } catch (FileNotFoundException e) {
530 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
531 } catch (IOException e) {
532 logger.log(logger.LOW, "IO exception: " +e.getMessage());
533 } catch (SAXException e) {
534 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
535 } catch (TikaException e) {
536 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
537 } catch (Exception e) {
538 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
539 } catch (java.lang.NoSuchMethodError e) {
540 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
542 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
547 private void indexResourceOOXML(Resource r) {
549 Data d = r.getData();
550 for (int i=0; i<20 && d.getSize() == 0; i++)
554 QTemporaryFile f = writeResource(d);
561 input = new FileInputStream(new File(f.fileName()));
562 ContentHandler textHandler = new BodyContentHandler(-1);
563 Metadata metadata = new Metadata();
564 OOXMLParser parser = new OOXMLParser();
565 ParseContext context = new ParseContext();
566 parser.parse(input, textHandler, metadata, context);
567 String[] result = textHandler.toString().split(regex);
568 for (int i=0; i<result.length && keepRunning; i++) {
572 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
577 } catch (java.lang.ClassCastException e) {
578 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
579 } catch (FileNotFoundException e) {
580 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
581 } catch (IOException e) {
582 logger.log(logger.LOW, "IO exception: " +e.getMessage());
583 } catch (SAXException e) {
584 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
585 } catch (TikaException e) {
586 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
587 } catch (Exception e) {
588 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
589 } catch (java.lang.NoSuchMethodError e) {
590 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
592 logger.log(logger.LOW, "Unknown error: " +e.getMessage()); }
597 private QTemporaryFile writeResource(Data d) {
598 QTemporaryFile newFile = new QTemporaryFile();
599 newFile.open(OpenModeFlag.WriteOnly);
600 newFile.write(d.getBody());
606 private String removeEnCrypt(String content) {
607 int index = content.indexOf("<en-crypt");
609 boolean tagFound = true;
610 while (tagFound && keepRunning) {
614 endPos = content.indexOf("</en-crypt>", index)+11;
615 if (endPos > -1 && index > -1) {
616 content = content.substring(0,index)+content.substring(endPos);
617 index = content.indexOf("<en-crypt");
626 private void addToIndex(String guid, String word, String type) {
627 if (foundWords.contains(word))
629 StringBuffer buffer = new StringBuffer(word.toLowerCase());
630 for (int i=buffer.length()-1; i>=0; i--) {
631 if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)
632 buffer.deleteCharAt(i);
636 buffer = buffer.reverse();
637 for (int i=buffer.length()-1; i>=0; i--) {
638 if (!Character.isLetterOrDigit(buffer.charAt(i)))
639 buffer.deleteCharAt(i);
643 buffer = buffer.reverse();
644 if (buffer.length() > 0) {
645 // We have a good word, now let's trim off junk at the beginning or end
646 if (!foundWords.contains(buffer.toString())) {
647 foundWords.add(buffer.toString());
648 foundWords.add(word);
649 conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);
651 if (uncommittedCount > 100) {
652 conn.commitTransaction();
660 private void scanUnindexed() {
661 // List<String> notes = conn.getNoteTable().getUnindexed();
663 boolean started = false;
664 // if (notes.size() > 0) {
665 // signal.indexStarted.emit();
668 // for (int i=0; i<notes.size() && keepRunning; i++) {
670 // processInterrupt();
672 // guid = notes.get(i);
673 // if (guid != null && keepRunning) {
674 // indexNoteContent();
678 List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();
679 if (unindexedResources.size() > 0 && !started) {
680 signal.indexStarted.emit();
683 for (int i=0; i<unindexedResources.size()&& keepRunning; i++) {
687 guid = unindexedResources.get(i);
693 // Cleanup stuff that was deleted at some point
694 List<String> guids = conn.getWordsTable().getGuidList();
695 logger.log(logger.LOW, "GUIDS in index: " +guids.size());
696 for (int i=0; i<guids.size() && keepRunning; i++) {
697 if (!conn.getNoteTable().exists(guids.get(i))) {
698 logger.log(logger.LOW, "Old GUID found: " +guids.get(i));
699 conn.getWordsTable().expunge(guids.get(i));
703 if (started && keepRunning)
704 signal.indexFinished.emit();
707 private void reindexNote() {
710 conn.getNoteTable().setIndexNeeded(guid, true);
713 private void reindexAll() {
714 conn.getNoteTable().reindexAllNotes();
715 conn.getNoteTable().noteResourceTable.reindexAll();
718 private void waitSeconds(int len) {
719 long starttime = 0; // variable declared
721 // for the first time, remember the timestamp
722 starttime = System.currentTimeMillis();
723 // the next timestamp we want to wake up
724 starttime += (1000.0);
725 // Wait until the desired next time arrives using nanosecond
726 // accuracy timer (wait(time) isn't accurate enough on most platforms)
727 LockSupport.parkNanos((Math.max(0,
728 starttime - System.currentTimeMillis()) * 1000000));
731 private void processInterrupt() {
732 conn.commitTransaction();
734 uncommittedCount = 0;
735 conn.beginTransaction();