2 * This file is part of NixNote/NeighborNote
3 * Copyright 2009 Randy Baumgarte
4 * Copyright 2013 Yuki Takahashi
6 * This file may be licensed under the terms of of the
7 * GNU General Public License Version 2 (the ``GPL'').
9 * Software distributed under the License is distributed
10 * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
11 * express or implied. See the GPL for the specific language
12 * governing rights and limitations.
14 * You should have received a copy of the GPL along with this
15 * program. If not, go to http://www.gnu.org/licenses/gpl.html
16 * or write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 package cx.fbn.nevernote.threads;
24 import java.io.FileInputStream;
25 import java.io.FileNotFoundException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.util.List;
29 import java.util.TreeSet;
30 import java.util.concurrent.LinkedBlockingQueue;
31 import java.util.concurrent.locks.LockSupport;
33 import org.apache.commons.lang3.StringEscapeUtils;
34 import org.apache.tika.exception.TikaException;
35 import org.apache.tika.metadata.Metadata;
36 import org.apache.tika.parser.ParseContext;
37 import org.apache.tika.parser.microsoft.OfficeParser;
38 import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
39 import org.apache.tika.parser.odf.OpenDocumentParser;
40 import org.apache.tika.parser.pdf.PDFParser;
41 import org.apache.tika.parser.rtf.RTFParser;
42 import org.apache.tika.sax.BodyContentHandler;
43 import org.xml.sax.ContentHandler;
44 import org.xml.sax.SAXException;
46 import com.evernote.edam.type.Data;
47 import com.evernote.edam.type.Note;
48 import com.evernote.edam.type.Resource;
49 import com.trolltech.qt.core.QByteArray;
50 import com.trolltech.qt.core.QIODevice.OpenModeFlag;
51 import com.trolltech.qt.core.QObject;
52 import com.trolltech.qt.core.QTemporaryFile;
53 import com.trolltech.qt.xml.QDomDocument;
54 import com.trolltech.qt.xml.QDomElement;
55 import com.trolltech.qt.xml.QDomNodeList;
57 import cx.fbn.nevernote.Global;
58 import cx.fbn.nevernote.signals.IndexSignal;
59 import cx.fbn.nevernote.signals.NoteResourceSignal;
60 import cx.fbn.nevernote.signals.NoteSignal;
61 import cx.fbn.nevernote.sql.DatabaseConnection;
62 import cx.fbn.nevernote.utilities.ApplicationLogger;
64 public class IndexRunner extends QObject implements Runnable {
66 private final ApplicationLogger logger;
68 private QByteArray resourceBinary;
69 public volatile NoteSignal noteSignal;
70 public volatile NoteResourceSignal resourceSignal;
71 private int indexType;
72 public final int SCAN=1;
73 public final int REINDEXALL=2;
74 public final int REINDEXNOTE=3;
75 public boolean keepRunning;
76 private final QDomDocument doc;
77 private static String regex = Global.getWordRegex();
78 public String specialIndexCharacters = "";
79 public boolean indexNoteBody = true;
80 public boolean indexNoteTitle = true;
81 public boolean indexImageRecognition = true;
82 private final DatabaseConnection conn;
83 private volatile LinkedBlockingQueue<String> workQueue;
84 private static int MAX_QUEUED_WAITING = 1000;
85 public boolean interrupt;
87 public boolean indexAttachmentsLocally = true;
88 public volatile IndexSignal signal;
89 private final TreeSet<String> foundWords;
90 int uncommittedCount = 0;
92 // ICHANGED String bを追加
93 public IndexRunner(String logname, String u, String i, String r, String b, String uid, String pswd, String cpswd) {
94 foundWords = new TreeSet<String>();
95 logger = new ApplicationLogger(logname);
97 conn = new DatabaseConnection(logger, u, i, r, b, uid, pswd, cpswd, 500);
101 doc = new QDomDocument();
102 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);
105 public void setIndexType(int t) {
112 thread().setPriority(Thread.MIN_PRIORITY);
113 noteSignal = new NoteSignal();
114 resourceSignal = new NoteResourceSignal();
115 signal = new IndexSignal();
116 logger.log(logger.EXTREME, "Starting index thread ");
117 while (keepRunning) {
120 conn.commitTransaction();
121 uncommittedCount = 0;
122 String work = workQueue.take();
124 if (work.startsWith("SCAN")) {
129 if (work.startsWith("REINDEXALL")) {
131 indexType=REINDEXALL;
133 if (work.startsWith("REINDEXNOTE")) {
134 work = work.replace("REINDEXNOTE ", "");
136 indexType = REINDEXNOTE;
138 if (work.startsWith("STOP")) {
142 logger.log(logger.EXTREME, "Type:" +indexType);
143 if (indexType == SCAN && keepRunning) {
144 logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");
148 if (indexType == REINDEXALL && keepRunning) {
149 logger.log(logger.MEDIUM, "Marking all for reindex");
153 if (indexType == REINDEXNOTE && keepRunning) {
156 } catch (InterruptedException e) {
157 logger.log(logger.LOW, "Thread interrupted exception: " +e.getMessage());
160 logger.log(logger.EXTREME, "Shutting down database");
162 logger.log(logger.EXTREME, "Database shut down. Exiting thread");
166 public void indexNoteContent() {
169 logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");
171 logger.log(logger.EXTREME, "Getting note content");
172 Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);
175 data = n.getContent();
176 data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());
178 logger.log(logger.EXTREME, "Removing any encrypted data");
179 data = removeEnCrypt(data.toString());
180 logger.log(logger.EXTREME, "Removing xml markups");
185 text = removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());
187 text = removeTags(StringEscapeUtils.unescapeHtml4(data));
189 logger.log(logger.EXTREME, "Splitting words");
190 String[] result = text.toString().split(regex);
191 conn.commitTransaction();
192 conn.beginTransaction();
193 logger.log(logger.EXTREME, "Deleting existing words for note from index");
194 conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");
196 logger.log(logger.EXTREME, "Number of words found: " +result.length);
197 for (int j=0; j<result.length && keepRunning; j++) {
201 if (!result[j].trim().equals("")) {
202 logger.log(logger.EXTREME, "Result word: " +result[j].trim());
203 addToIndex(guid, result[j], "CONTENT");
208 for (int j=0; j<n.getTagNamesSize(); j++) {
209 if (n.getTagNames() != null && n.getTagNames().get(j) != null && !n.getTagNames().get(j).trim().equals(""))
210 addToIndex(guid, n.getTagNames().get(j), "CONTENT");
213 // If we were interrupted, we will reindex this note next time
214 if (Global.keepRunning) {
215 logger.log(logger.EXTREME, "Resetting note guid needed");
216 conn.getNoteTable().setIndexNeeded(guid, false);
218 conn.commitTransaction();
219 uncommittedCount = 0;
220 logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");
224 private String removeTags(String text) {
225 StringBuffer buffer = new StringBuffer(text);
226 boolean inTag = false;
227 for (int i=buffer.length()-1; i>=0; i--) {
228 if (buffer.charAt(i) == '>')
230 if (buffer.charAt(i) == '<')
232 if (inTag || buffer.charAt(i) == '<')
233 buffer.deleteCharAt(i);
236 return buffer.toString();
240 public synchronized boolean addWork(String request) {
241 if (workQueue.size() == 0) {
242 workQueue.offer(request);
248 public synchronized int getWorkQueueSize() {
249 return workQueue.size();
252 public void indexResource() {
257 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);
258 if (!indexImageRecognition ||
259 r == null || r.getRecognition() == null ||
260 r.getRecognition().getBody() == null ||
261 r.getRecognition().getBody().length == 0)
262 resourceBinary = new QByteArray(" ");
264 resourceBinary = new QByteArray(r.getRecognition().getBody());
266 conn.commitTransaction();
267 conn.beginTransaction();
268 conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");
269 // This is due to an old bug & can be removed at some point in the future 11/23/2010
270 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");
271 conn.commitTransaction();
272 uncommittedCount = 0;
273 conn.beginTransaction();
275 doc.setContent(resourceBinary);
276 QDomElement docElem = doc.documentElement();
278 // look for text tags
279 QDomNodeList anchors = docElem.elementsByTagName("t");
280 for (int i=0; i<anchors.length() && keepRunning; i++) {
286 QDomElement enmedia = anchors.at(i).toElement();
287 String weight = new String(enmedia.attribute("w"));
288 String text = new String(enmedia.text()).toLowerCase();
289 if (!text.equals("")) {
290 conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));
292 if (uncommittedCount > 100) {
293 conn.commitTransaction();
299 if (Global.keepRunning && indexAttachmentsLocally) {
300 conn.commitTransaction();
301 uncommittedCount = 0;
302 conn.beginTransaction();
303 indexResourceContent(guid);
306 if (Global.keepRunning)
307 conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);
308 conn.commitTransaction();
309 uncommittedCount = 0;
312 private void indexResourceContent(String guid) {
313 Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);
314 if (r != null && r.getMime() != null) {
315 if (r.getMime().equalsIgnoreCase("application/pdf")) {
319 if (r.getMime().equalsIgnoreCase("application/docx") ||
320 r.getMime().equalsIgnoreCase("application/xlsx") ||
321 r.getMime().equalsIgnoreCase("application/pptx")) {
322 indexResourceOOXML(r);
325 if (r.getMime().equalsIgnoreCase("application/vsd") ||
326 r.getMime().equalsIgnoreCase("application/ppt") ||
327 r.getMime().equalsIgnoreCase("application/xls") ||
328 r.getMime().equalsIgnoreCase("application/msg") ||
329 r.getMime().equalsIgnoreCase("application/doc")) {
330 indexResourceOffice(r);
333 if (r.getMime().equalsIgnoreCase("application/rtf")) {
337 if (r.getMime().equalsIgnoreCase("application/odf") ||
338 r.getMime().equalsIgnoreCase("application/odt") ||
339 r.getMime().equalsIgnoreCase("application/odp") ||
340 r.getMime().equalsIgnoreCase("application/odg") ||
341 r.getMime().equalsIgnoreCase("application/odb") ||
342 r.getMime().equalsIgnoreCase("application/ods")) {
350 private void indexResourceRTF(Resource r) {
352 Data d = r.getData();
353 for (int i=0; i<20 && d.getSize() == 0; i++)
358 QTemporaryFile f = writeResource(d);
365 input = new FileInputStream(new File(f.fileName()));
366 ContentHandler textHandler = new BodyContentHandler(-1);
367 Metadata metadata = new Metadata();
368 RTFParser parser = new RTFParser();
369 ParseContext context = new ParseContext();
370 parser.parse(input, textHandler, metadata, context);
371 String[] result = textHandler.toString().split(regex);
372 for (int i=0; i<result.length && keepRunning; i++) {
373 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
378 } catch (java.lang.ClassCastException e) {
379 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
380 } catch (FileNotFoundException e) {
381 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
382 } catch (IOException e) {
383 logger.log(logger.LOW, "IO exception: " +e.getMessage());
384 } catch (SAXException e) {
385 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
386 } catch (TikaException e) {
387 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
388 } catch (Exception e) {
389 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
390 } catch (java.lang.NoSuchMethodError e) {
391 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
393 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
398 private void indexResourceODF(Resource r) {
400 Data d = r.getData();
401 for (int i=0; i<20 && d.getSize() == 0; i++)
405 QTemporaryFile f = writeResource(d);
412 input = new FileInputStream(new File(f.fileName()));
413 ContentHandler textHandler = new BodyContentHandler(-1);
414 Metadata metadata = new Metadata();
415 OpenDocumentParser parser = new OpenDocumentParser();
416 ParseContext context = new ParseContext();
417 parser.parse(input, textHandler, metadata, context);
418 String[] result = textHandler.toString().split(regex);
419 for (int i=0; i<result.length && keepRunning; i++) {
423 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
428 } catch (java.lang.ClassCastException e) {
429 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
430 } catch (FileNotFoundException e) {
431 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
432 } catch (IOException e) {
433 logger.log(logger.LOW, "IO exception: " +e.getMessage());
434 } catch (SAXException e) {
435 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
436 } catch (TikaException e) {
437 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
438 } catch (Exception e) {
439 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
440 } catch (java.lang.NoSuchMethodError e) {
441 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
443 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
448 private void indexResourceOffice(Resource r) {
450 Data d = r.getData();
451 for (int i=0; i<20 && d.getSize() == 0; i++)
455 QTemporaryFile f = writeResource(d);
462 input = new FileInputStream(new File(f.fileName()));
463 ContentHandler textHandler = new BodyContentHandler(-1);
464 Metadata metadata = new Metadata();
465 OfficeParser parser = new OfficeParser();
466 ParseContext context = new ParseContext();
467 parser.parse(input, textHandler, metadata, context);
468 String[] result = textHandler.toString().split(regex);
469 for (int i=0; i<result.length && keepRunning; i++) {
473 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
478 } catch (java.lang.ClassCastException e) {
479 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
480 } catch (FileNotFoundException e) {
481 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
482 } catch (IOException e) {
483 logger.log(logger.LOW, "IO exception: " +e.getMessage());
484 } catch (SAXException e) {
485 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
486 } catch (TikaException e) {
487 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
488 } catch (Exception e) {
489 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
490 } catch (java.lang.NoSuchMethodError e) {
491 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
493 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
499 private void indexResourcePDF(Resource r) {
501 Data d = r.getData();
502 for (int i=0; i<20 && d.getSize() == 0; i++)
506 QTemporaryFile f = writeResource(d);
513 input = new FileInputStream(new File(f.fileName()));
514 ContentHandler textHandler = new BodyContentHandler(-1);
515 Metadata metadata = new Metadata();
516 PDFParser parser = new PDFParser();
517 ParseContext context = new ParseContext();
518 parser.parse(input, textHandler, metadata, context);
519 String[] result = textHandler.toString().split(regex);
520 for (int i=0; i<result.length && keepRunning; i++) {
524 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
529 } catch (java.lang.ClassCastException e) {
530 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
531 } catch (FileNotFoundException e) {
532 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
533 } catch (IOException e) {
534 logger.log(logger.LOW, "IO exception: " +e.getMessage());
535 } catch (SAXException e) {
536 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
537 } catch (TikaException e) {
538 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
539 } catch (Exception e) {
540 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
541 } catch (java.lang.NoSuchMethodError e) {
542 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
544 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
549 private void indexResourceOOXML(Resource r) {
551 Data d = r.getData();
552 for (int i=0; i<20 && d.getSize() == 0; i++)
556 QTemporaryFile f = writeResource(d);
563 input = new FileInputStream(new File(f.fileName()));
564 ContentHandler textHandler = new BodyContentHandler(-1);
565 Metadata metadata = new Metadata();
566 OOXMLParser parser = new OOXMLParser();
567 ParseContext context = new ParseContext();
568 parser.parse(input, textHandler, metadata, context);
569 String[] result = textHandler.toString().split(regex);
570 for (int i=0; i<result.length && keepRunning; i++) {
574 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
579 } catch (java.lang.ClassCastException e) {
580 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
581 } catch (FileNotFoundException e) {
582 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
583 } catch (IOException e) {
584 logger.log(logger.LOW, "IO exception: " +e.getMessage());
585 } catch (SAXException e) {
586 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
587 } catch (TikaException e) {
588 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
589 } catch (Exception e) {
590 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
591 } catch (java.lang.NoSuchMethodError e) {
592 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
594 logger.log(logger.LOW, "Unknown error: " +e.getMessage()); }
599 private QTemporaryFile writeResource(Data d) {
600 QTemporaryFile newFile = new QTemporaryFile();
601 newFile.open(OpenModeFlag.WriteOnly);
602 newFile.write(d.getBody());
608 private String removeEnCrypt(String content) {
609 int index = content.indexOf("<en-crypt");
611 boolean tagFound = true;
612 while (tagFound && keepRunning) {
616 endPos = content.indexOf("</en-crypt>", index)+11;
617 if (endPos > -1 && index > -1) {
618 content = content.substring(0,index)+content.substring(endPos);
619 index = content.indexOf("<en-crypt");
628 private void addToIndex(String guid, String word, String type) {
629 if (foundWords.contains(word))
631 StringBuffer buffer = new StringBuffer(word.toLowerCase());
632 for (int i=buffer.length()-1; i>=0; i--) {
633 if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)
634 buffer.deleteCharAt(i);
638 buffer = buffer.reverse();
639 for (int i=buffer.length()-1; i>=0; i--) {
640 if (!Character.isLetterOrDigit(buffer.charAt(i)))
641 buffer.deleteCharAt(i);
645 buffer = buffer.reverse();
646 if (buffer.length() > 0) {
647 // We have a good word, now let's trim off junk at the beginning or end
648 if (!foundWords.contains(buffer.toString())) {
649 foundWords.add(buffer.toString());
650 foundWords.add(word);
651 conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);
653 if (uncommittedCount > 100) {
654 conn.commitTransaction();
662 private void scanUnindexed() {
663 List<String> notes = conn.getNoteTable().getUnindexed();
665 boolean started = false;
666 if (notes.size() > 0) {
667 signal.indexStarted.emit();
670 for (int i=0; i<notes.size() && keepRunning; i++) {
675 if (guid != null && keepRunning) {
680 List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();
681 if (unindexedResources.size() > 0 && !started) {
682 signal.indexStarted.emit();
685 for (int i=0; i<unindexedResources.size()&& keepRunning; i++) {
689 guid = unindexedResources.get(i);
695 // Cleanup stuff that was deleted at some point
696 List<String> guids = conn.getWordsTable().getGuidList();
697 logger.log(logger.LOW, "GUIDS in index: " +guids.size());
698 for (int i=0; i<guids.size() && keepRunning; i++) {
699 if (!conn.getNoteTable().exists(guids.get(i))) {
700 logger.log(logger.LOW, "Old GUID found: " +guids.get(i));
701 conn.getWordsTable().expunge(guids.get(i));
705 if (started && keepRunning)
706 signal.indexFinished.emit();
709 private void reindexNote() {
712 conn.getNoteTable().setIndexNeeded(guid, true);
715 private void reindexAll() {
716 conn.getNoteTable().reindexAllNotes();
717 conn.getNoteTable().noteResourceTable.reindexAll();
720 private void waitSeconds(int len) {
721 long starttime = 0; // variable declared
723 // for the first time, remember the timestamp
724 starttime = System.currentTimeMillis();
725 // the next timestamp we want to wake up
726 starttime += (1000.0);
727 // Wait until the desired next time arrives using nanosecond
728 // accuracy timer (wait(time) isn't accurate enough on most platforms)
729 LockSupport.parkNanos((Math.max(0,
730 starttime - System.currentTimeMillis()) * 1000000));
733 private void processInterrupt() {
734 conn.commitTransaction();
736 uncommittedCount = 0;
737 conn.beginTransaction();