2 * This file is part of NixNote/NeighborNote
3 * Copyright 2009 Randy Baumgarte
4 * Copyright 2013 Yuki Takahashi
6 * This file may be licensed under the terms of of the
7 * GNU General Public License Version 2 (the ``GPL'').
9 * Software distributed under the License is distributed
10 * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
11 * express or implied. See the GPL for the specific language
12 * governing rights and limitations.
14 * You should have received a copy of the GPL along with this
15 * program. If not, go to http://www.gnu.org/licenses/gpl.html
16 * or write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 package cx.fbn.nevernote.threads;
24 import java.io.FileInputStream;
25 import java.io.FileNotFoundException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.util.List;
29 import java.util.TreeSet;
30 import java.util.concurrent.LinkedBlockingQueue;
31 import java.util.concurrent.locks.LockSupport;
33 import org.apache.tika.exception.TikaException;
34 import org.apache.tika.metadata.Metadata;
35 import org.apache.tika.parser.ParseContext;
36 import org.apache.tika.parser.microsoft.OfficeParser;
37 import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
38 import org.apache.tika.parser.odf.OpenDocumentParser;
39 import org.apache.tika.parser.pdf.PDFParser;
40 import org.apache.tika.parser.rtf.RTFParser;
41 import org.apache.tika.sax.BodyContentHandler;
42 import org.xml.sax.ContentHandler;
43 import org.xml.sax.SAXException;
45 import com.evernote.edam.type.Data;
46 import com.evernote.edam.type.Resource;
47 import com.trolltech.qt.core.QByteArray;
48 import com.trolltech.qt.core.QIODevice.OpenModeFlag;
49 import com.trolltech.qt.core.QObject;
50 import com.trolltech.qt.core.QTemporaryFile;
51 import com.trolltech.qt.xml.QDomDocument;
52 import com.trolltech.qt.xml.QDomElement;
53 import com.trolltech.qt.xml.QDomNodeList;
55 import cx.fbn.nevernote.Global;
56 import cx.fbn.nevernote.signals.IndexSignal;
57 import cx.fbn.nevernote.signals.NoteResourceSignal;
58 import cx.fbn.nevernote.signals.NoteSignal;
59 import cx.fbn.nevernote.sql.DatabaseConnection;
60 import cx.fbn.nevernote.utilities.ApplicationLogger;
62 public class IndexRunner extends QObject implements Runnable {
64 private final ApplicationLogger logger;
66 private QByteArray resourceBinary;
67 public volatile NoteSignal noteSignal;
68 public volatile NoteResourceSignal resourceSignal;
69 private int indexType;
70 public final int SCAN=1;
71 public final int REINDEXALL=2;
72 public final int REINDEXNOTE=3;
73 public boolean keepRunning;
74 private final QDomDocument doc;
75 // private static String regex = Global.getWordRegex();
76 // public String specialIndexCharacters = "";
77 // public boolean indexNoteBody = true;
78 // public boolean indexNoteTitle = true;
79 public boolean indexImageRecognition = true;
80 private final DatabaseConnection conn;
81 private volatile LinkedBlockingQueue<String> workQueue;
82 private static int MAX_QUEUED_WAITING = 1000;
83 public boolean interrupt;
85 public boolean indexAttachmentsLocally = true;
86 public volatile IndexSignal signal;
87 private final TreeSet<String> foundWords;
88 int uncommittedCount = 0;
90 // ICHANGED String bを追加
91 public IndexRunner(String logname, String u, String i, String r, String b, String uid, String pswd, String cpswd) {
92 foundWords = new TreeSet<String>();
93 logger = new ApplicationLogger(logname);
95 conn = new DatabaseConnection(logger, u, i, r, b, uid, pswd, cpswd, 500);
99 doc = new QDomDocument();
100 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);
103 public void setIndexType(int t) {
110 thread().setPriority(Thread.MIN_PRIORITY);
111 noteSignal = new NoteSignal();
112 resourceSignal = new NoteResourceSignal();
113 signal = new IndexSignal();
114 logger.log(logger.EXTREME, "Starting index thread ");
115 while (keepRunning) {
118 conn.commitTransaction();
119 uncommittedCount = 0;
120 String work = workQueue.take();
122 if (work.startsWith("SCAN")) {
127 if (work.startsWith("REINDEXALL")) {
129 indexType=REINDEXALL;
131 if (work.startsWith("REINDEXNOTE")) {
132 work = work.replace("REINDEXNOTE ", "");
134 indexType = REINDEXNOTE;
136 if (work.startsWith("STOP")) {
140 logger.log(logger.EXTREME, "Type:" +indexType);
141 if (indexType == SCAN && keepRunning) {
142 logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");
146 if (indexType == REINDEXALL && keepRunning) {
147 logger.log(logger.MEDIUM, "Marking all for reindex");
151 if (indexType == REINDEXNOTE && keepRunning) {
154 } catch (InterruptedException e) {
155 logger.log(logger.LOW, "Thread interrupted exception: " +e.getMessage());
158 logger.log(logger.EXTREME, "Shutting down database");
160 logger.log(logger.EXTREME, "Database shut down. Exiting thread");
164 // public void indexNoteContent() {
165 // foundWords.clear();
167 // logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");
169 // logger.log(logger.EXTREME, "Getting note content");
170 // Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);
172 // if (indexNoteBody) {
173 // data = n.getContent();
174 // data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());
176 // logger.log(logger.EXTREME, "Removing any encrypted data");
177 // data = removeEnCrypt(data.toString());
178 // logger.log(logger.EXTREME, "Removing xml markups");
182 // if (indexNoteTitle)
183 // text = removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());
185 // text = removeTags(StringEscapeUtils.unescapeHtml4(data));
187 // logger.log(logger.EXTREME, "Splitting words");
188 // String[] result = text.toString().split(regex);
189 // conn.commitTransaction();
190 // conn.beginTransaction();
191 // logger.log(logger.EXTREME, "Deleting existing words for note from index");
192 // conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");
194 // logger.log(logger.EXTREME, "Number of words found: " +result.length);
195 // for (int j=0; j<result.length && keepRunning; j++) {
197 // processInterrupt();
199 // if (!result[j].trim().equals("")) {
200 // logger.log(logger.EXTREME, "Result word: " +result[j].trim());
201 // addToIndex(guid, result[j], "CONTENT");
206 // for (int j=0; j<n.getTagNamesSize(); j++) {
207 // if (n.getTagNames() != null && n.getTagNames().get(j) != null && !n.getTagNames().get(j).trim().equals(""))
208 // addToIndex(guid, n.getTagNames().get(j), "CONTENT");
211 // // If we were interrupted, we will reindex this note next time
212 // if (Global.keepRunning) {
213 // logger.log(logger.EXTREME, "Resetting note guid needed");
214 // conn.getNoteTable().setIndexNeeded(guid, false);
216 // conn.commitTransaction();
217 // uncommittedCount = 0;
218 // logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");
222 private String removeTags(String text) {
223 StringBuffer buffer = new StringBuffer(text);
224 boolean inTag = false;
225 for (int i=buffer.length()-1; i>=0; i--) {
226 if (buffer.charAt(i) == '>')
228 if (buffer.charAt(i) == '<')
230 if (inTag || buffer.charAt(i) == '<')
231 buffer.deleteCharAt(i);
234 return buffer.toString();
238 public synchronized boolean addWork(String request) {
239 if (workQueue.size() == 0) {
240 workQueue.offer(request);
246 public synchronized int getWorkQueueSize() {
247 return workQueue.size();
250 public void indexResource() {
255 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);
256 if (!indexImageRecognition ||
257 r == null || r.getRecognition() == null ||
258 r.getRecognition().getBody() == null ||
259 r.getRecognition().getBody().length == 0)
260 resourceBinary = new QByteArray(" ");
262 resourceBinary = new QByteArray(r.getRecognition().getBody());
264 conn.commitTransaction();
265 conn.beginTransaction();
266 conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");
267 // This is due to an old bug & can be removed at some point in the future 11/23/2010
268 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");
269 conn.commitTransaction();
270 uncommittedCount = 0;
271 conn.beginTransaction();
273 doc.setContent(resourceBinary);
274 QDomElement docElem = doc.documentElement();
276 // look for text tags
277 QDomNodeList anchors = docElem.elementsByTagName("t");
278 for (int i=0; i<anchors.length() && keepRunning; i++) {
284 QDomElement enmedia = anchors.at(i).toElement();
285 String weight = new String(enmedia.attribute("w"));
286 String text = new String(enmedia.text()).toLowerCase();
287 if (!text.equals("")) {
288 conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));
290 if (uncommittedCount > 100) {
291 conn.commitTransaction();
297 if (Global.keepRunning && indexAttachmentsLocally) {
298 conn.commitTransaction();
299 uncommittedCount = 0;
300 conn.beginTransaction();
301 indexResourceContent(guid);
304 if (Global.keepRunning)
305 conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);
306 conn.commitTransaction();
307 uncommittedCount = 0;
310 private void indexResourceContent(String guid) {
311 Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);
312 if (r != null && r.getMime() != null) {
313 if (r.getMime().equalsIgnoreCase("application/pdf")) {
317 if (r.getMime().equalsIgnoreCase("application/docx") ||
318 r.getMime().equalsIgnoreCase("application/xlsx") ||
319 r.getMime().equalsIgnoreCase("application/pptx")) {
320 indexResourceOOXML(r);
323 if (r.getMime().equalsIgnoreCase("application/vsd") ||
324 r.getMime().equalsIgnoreCase("application/ppt") ||
325 r.getMime().equalsIgnoreCase("application/xls") ||
326 r.getMime().equalsIgnoreCase("application/msg") ||
327 r.getMime().equalsIgnoreCase("application/doc")) {
328 indexResourceOffice(r);
331 if (r.getMime().equalsIgnoreCase("application/rtf")) {
335 if (r.getMime().equalsIgnoreCase("application/odf") ||
336 r.getMime().equalsIgnoreCase("application/odt") ||
337 r.getMime().equalsIgnoreCase("application/odp") ||
338 r.getMime().equalsIgnoreCase("application/odg") ||
339 r.getMime().equalsIgnoreCase("application/odb") ||
340 r.getMime().equalsIgnoreCase("application/ods")) {
348 private void indexResourceRTF(Resource r) {
350 Data d = r.getData();
351 for (int i=0; i<20 && d.getSize() == 0; i++)
356 QTemporaryFile f = writeResource(d);
363 input = new FileInputStream(new File(f.fileName()));
364 ContentHandler textHandler = new BodyContentHandler(-1);
365 Metadata metadata = new Metadata();
366 RTFParser parser = new RTFParser();
367 ParseContext context = new ParseContext();
368 parser.parse(input, textHandler, metadata, context);
369 // String[] result = textHandler.toString().split(regex);
370 // for (int i=0; i<result.length && keepRunning; i++) {
371 // addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
373 updateResourceText(r.getGuid(), textHandler.toString());
377 } catch (java.lang.ClassCastException e) {
378 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
379 } catch (FileNotFoundException e) {
380 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
381 } catch (IOException e) {
382 logger.log(logger.LOW, "IO exception: " +e.getMessage());
383 } catch (SAXException e) {
384 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
385 } catch (TikaException e) {
386 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
387 } catch (Exception e) {
388 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
389 } catch (java.lang.NoSuchMethodError e) {
390 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
392 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
397 private void indexResourceODF(Resource r) {
399 Data d = r.getData();
400 for (int i=0; i<20 && d.getSize() == 0; i++)
404 QTemporaryFile f = writeResource(d);
411 input = new FileInputStream(new File(f.fileName()));
412 ContentHandler textHandler = new BodyContentHandler(-1);
413 Metadata metadata = new Metadata();
414 OpenDocumentParser parser = new OpenDocumentParser();
415 ParseContext context = new ParseContext();
416 parser.parse(input, textHandler, metadata, context);
417 // String[] result = textHandler.toString().split(regex);
418 // for (int i=0; i<result.length && keepRunning; i++) {
420 // processInterrupt();
422 // addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
424 updateResourceText(r.getGuid(), textHandler.toString());
428 } catch (java.lang.ClassCastException e) {
429 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
430 } catch (FileNotFoundException e) {
431 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
432 } catch (IOException e) {
433 logger.log(logger.LOW, "IO exception: " +e.getMessage());
434 } catch (SAXException e) {
435 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
436 } catch (TikaException e) {
437 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
438 } catch (Exception e) {
439 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
440 } catch (java.lang.NoSuchMethodError e) {
441 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
443 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
448 private void indexResourceOffice(Resource r) {
450 Data d = r.getData();
451 for (int i=0; i<20 && d.getSize() == 0; i++)
455 QTemporaryFile f = writeResource(d);
462 input = new FileInputStream(new File(f.fileName()));
463 ContentHandler textHandler = new BodyContentHandler(-1);
464 Metadata metadata = new Metadata();
465 OfficeParser parser = new OfficeParser();
466 ParseContext context = new ParseContext();
467 parser.parse(input, textHandler, metadata, context);
468 // String[] result = textHandler.toString().split(regex);
469 // for (int i=0; i<result.length && keepRunning; i++) {
471 // processInterrupt();
473 // addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
475 updateResourceText(r.getGuid(), textHandler.toString());
479 } catch (java.lang.ClassCastException e) {
480 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
481 } catch (FileNotFoundException e) {
482 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
483 } catch (IOException e) {
484 logger.log(logger.LOW, "IO exception: " +e.getMessage());
485 } catch (SAXException e) {
486 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
487 } catch (TikaException e) {
488 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
489 } catch (Exception e) {
490 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
491 } catch (java.lang.NoSuchMethodError e) {
492 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
494 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
500 private void indexResourcePDF(Resource r) {
502 Data d = r.getData();
503 for (int i=0; i<20 && d.getSize() == 0; i++)
507 QTemporaryFile f = writeResource(d);
514 input = new FileInputStream(new File(f.fileName()));
515 ContentHandler textHandler = new BodyContentHandler(-1);
516 Metadata metadata = new Metadata();
517 PDFParser parser = new PDFParser();
518 ParseContext context = new ParseContext();
519 parser.parse(input, textHandler, metadata, context);
520 // String[] result = textHandler.toString().split(regex);
521 // for (int i=0; i<result.length && keepRunning; i++) {
523 // processInterrupt();
525 // addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
527 updateResourceText(r.getGuid(), textHandler.toString());
531 } catch (java.lang.ClassCastException e) {
532 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
533 } catch (FileNotFoundException e) {
534 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
535 } catch (IOException e) {
536 logger.log(logger.LOW, "IO exception: " +e.getMessage());
537 } catch (SAXException e) {
538 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
539 } catch (TikaException e) {
540 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
541 } catch (Exception e) {
542 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
543 } catch (java.lang.NoSuchMethodError e) {
544 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
546 logger.log(logger.LOW, "Unknown error: " +e.getMessage());
551 private void indexResourceOOXML(Resource r) {
553 Data d = r.getData();
554 for (int i=0; i<20 && d.getSize() == 0; i++)
558 QTemporaryFile f = writeResource(d);
565 input = new FileInputStream(new File(f.fileName()));
566 ContentHandler textHandler = new BodyContentHandler(-1);
567 Metadata metadata = new Metadata();
568 OOXMLParser parser = new OOXMLParser();
569 ParseContext context = new ParseContext();
570 parser.parse(input, textHandler, metadata, context);
571 // String[] result = textHandler.toString().split(regex);
572 // for (int i=0; i<result.length && keepRunning; i++) {
574 // processInterrupt();
576 // addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
578 updateResourceText(r.getGuid(), textHandler.toString());
582 } catch (java.lang.ClassCastException e) {
583 logger.log(logger.LOW, "Cast exception: " +e.getMessage());
584 } catch (FileNotFoundException e) {
585 logger.log(logger.LOW, "FileNotFound exception: " +e.getMessage());
586 } catch (IOException e) {
587 logger.log(logger.LOW, "IO exception: " +e.getMessage());
588 } catch (SAXException e) {
589 logger.log(logger.LOW, "SAX exception: " +e.getMessage());
590 } catch (TikaException e) {
591 logger.log(logger.LOW, "Tika exception: " +e.getMessage());
592 } catch (Exception e) {
593 logger.log(logger.LOW, "Unknown exception: " +e.getMessage());
594 } catch (java.lang.NoSuchMethodError e) {
595 logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());
597 logger.log(logger.LOW, "Unknown error: " +e.getMessage()); }
602 private QTemporaryFile writeResource(Data d) {
603 QTemporaryFile newFile = new QTemporaryFile();
604 newFile.open(OpenModeFlag.WriteOnly);
605 newFile.write(d.getBody());
611 private String removeEnCrypt(String content) {
612 int index = content.indexOf("<en-crypt");
614 boolean tagFound = true;
615 while (tagFound && keepRunning) {
619 endPos = content.indexOf("</en-crypt>", index)+11;
620 if (endPos > -1 && index > -1) {
621 content = content.substring(0,index)+content.substring(endPos);
622 index = content.indexOf("<en-crypt");
631 // private void addToIndex(String guid, String word, String type) {
632 // if (foundWords.contains(word))
634 // StringBuffer buffer = new StringBuffer(word.toLowerCase());
635 // for (int i=buffer.length()-1; i>=0; i--) {
636 // if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)
637 // buffer.deleteCharAt(i);
641 // buffer = buffer.reverse();
642 // for (int i=buffer.length()-1; i>=0; i--) {
643 // if (!Character.isLetterOrDigit(buffer.charAt(i)))
644 // buffer.deleteCharAt(i);
648 // buffer = buffer.reverse();
649 // if (buffer.length() > 0) {
650 // // We have a good word, now let's trim off junk at the beginning or end
651 // if (!foundWords.contains(buffer.toString())) {
652 // foundWords.add(buffer.toString());
653 // foundWords.add(word);
654 // conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);
655 // uncommittedCount++;
656 // if (uncommittedCount > 100) {
657 // conn.commitTransaction();
658 // uncommittedCount=0;
665 // ノートリソーステーブルのリソーステキストに追加
666 private void updateResourceText(String guid, String text) {
667 conn.getNoteTable().noteResourceTable.updateResourceText(guid, text);
670 private void scanUnindexed() {
671 // List<String> notes = conn.getNoteTable().getUnindexed();
673 boolean started = false;
674 // if (notes.size() > 0) {
675 // signal.indexStarted.emit();
678 // for (int i=0; i<notes.size() && keepRunning; i++) {
680 // processInterrupt();
682 // guid = notes.get(i);
683 // if (guid != null && keepRunning) {
684 // indexNoteContent();
688 List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();
689 if (unindexedResources.size() > 0 && !started) {
690 signal.indexStarted.emit();
693 for (int i=0; i<unindexedResources.size()&& keepRunning; i++) {
697 guid = unindexedResources.get(i);
703 // Cleanup stuff that was deleted at some point
704 List<String> guids = conn.getWordsTable().getGuidList();
705 logger.log(logger.LOW, "GUIDS in index: " +guids.size());
706 for (int i=0; i<guids.size() && keepRunning; i++) {
707 if (!conn.getNoteTable().exists(guids.get(i))) {
708 logger.log(logger.LOW, "Old GUID found: " +guids.get(i));
709 conn.getWordsTable().expunge(guids.get(i));
713 if (started && keepRunning)
714 signal.indexFinished.emit();
717 private void reindexNote() {
720 conn.getNoteTable().setIndexNeeded(guid, true);
723 private void reindexAll() {
724 conn.getNoteTable().reindexAllNotes();
725 conn.getNoteTable().noteResourceTable.reindexAll();
728 private void waitSeconds(int len) {
729 long starttime = 0; // variable declared
731 // for the first time, remember the timestamp
732 starttime = System.currentTimeMillis();
733 // the next timestamp we want to wake up
734 starttime += (1000.0);
735 // Wait until the desired next time arrives using nanosecond
736 // accuracy timer (wait(time) isn't accurate enough on most platforms)
737 LockSupport.parkNanos((Math.max(0,
738 starttime - System.currentTimeMillis()) * 1000000));
741 private void processInterrupt() {
742 conn.commitTransaction();
744 uncommittedCount = 0;
745 conn.beginTransaction();