ローカル添付ファイルもApache Luceneを使った全文検索の対象に追加。

[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / threads / IndexRunner.java
diff --git a/src/cx/fbn/nevernote/threads/IndexRunner.java b/src/cx/fbn/nevernote/threads/IndexRunner.java

index 5eb6e1b..ad26cd6 100644 (file)
--- a/src/cx/fbn/nevernote/threads/IndexRunner.java
+++ b/src/cx/fbn/nevernote/threads/IndexRunner.java
@@ -1,6 +1,7 @@
  /*
- * This file is part of NixNote 
+ * This file is part of NixNote/NeighborNote 
   * Copyright 2009 Randy Baumgarte
+ * Copyright 2013 Yuki Takahashi
   * 
   * This file may be licensed under the terms of of the
   * GNU General Public License Version 2 (the ``GPL'').
@@ -29,7 +30,6 @@ import java.util.TreeSet;
  import java.util.concurrent.LinkedBlockingQueue;
  import java.util.concurrent.locks.LockSupport;
  
-import org.apache.commons.lang3.StringEscapeUtils;
  import org.apache.tika.exception.TikaException;
  import org.apache.tika.metadata.Metadata;
  import org.apache.tika.parser.ParseContext;
@@ -43,7 +43,6 @@ import org.xml.sax.ContentHandler;
  import org.xml.sax.SAXException;
  
  import com.evernote.edam.type.Data;
-import com.evernote.edam.type.Note;
  import com.evernote.edam.type.Resource;
  import com.trolltech.qt.core.QByteArray;
  import com.trolltech.qt.core.QIODevice.OpenModeFlag;
@@ -73,10 +72,10 @@ public class IndexRunner extends QObject implements Runnable {
         public final int                                        REINDEXNOTE=3;
         public boolean                                          keepRunning;
         private final QDomDocument                      doc;
-       private static String                           regex = Global.getWordRegex();
-       public String                                           specialIndexCharacters = "";
-       public boolean                                          indexNoteBody = true;
-       public boolean                                          indexNoteTitle = true;
+//     private static String                           regex = Global.getWordRegex();
+//     public String                                           specialIndexCharacters = "";
+//     public boolean                                          indexNoteBody = true;
+//     public boolean                                          indexNoteTitle = true;
         public boolean                                          indexImageRecognition = true;
         private final DatabaseConnection        conn;
         private volatile LinkedBlockingQueue<String> workQueue;
@@ -162,62 +161,62 @@ public class IndexRunner extends QObject implements Runnable {
         }
         
         // Reindex a note
-       public void indexNoteContent() {
-               foundWords.clear();
-               
-               logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");
-               
-               logger.log(logger.EXTREME, "Getting note content");
-               Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);
-               String data;
-               if (indexNoteBody) {
-                       data = n.getContent();
-                       data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());
-               
-                       logger.log(logger.EXTREME, "Removing any encrypted data");
-                       data = removeEnCrypt(data.toString());
-                       logger.log(logger.EXTREME, "Removing xml markups");
-               } else
-                       data = "";
-               String text;
-               if (indexNoteTitle)
-                       text =  removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());
-               else
-                       text = removeTags(StringEscapeUtils.unescapeHtml4(data));
-                               
-               logger.log(logger.EXTREME, "Splitting words");
-               String[] result = text.toString().split(regex);
-               conn.commitTransaction();
-               conn.beginTransaction();
-               logger.log(logger.EXTREME, "Deleting existing words for note from index");
-               conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");
-               
-               logger.log(logger.EXTREME, "Number of words found: " +result.length);
-               for (int j=0; j<result.length && keepRunning; j++) {
-                       if (interrupt) {
-                               processInterrupt();
-                       }
-                       if (!result[j].trim().equals("")) {
-                               logger.log(logger.EXTREME, "Result word: " +result[j].trim());
-                               addToIndex(guid, result[j], "CONTENT");
-                       }
-               }
-               
-               // Add tags
-               for (int j=0; j<n.getTagNamesSize(); j++) {
-                       if (n.getTagNames() != null && n.getTagNames().get(j) != null && !n.getTagNames().get(j).trim().equals(""))
-                               addToIndex(guid, n.getTagNames().get(j), "CONTENT");
-               }
-               
-               // If we were interrupted, we will reindex this note next time
-               if (Global.keepRunning) {
-                       logger.log(logger.EXTREME, "Resetting note guid needed");
-                       conn.getNoteTable().setIndexNeeded(guid, false);
-               } 
-               conn.commitTransaction();
-               uncommittedCount = 0;
-               logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");
-       }
+//     public void indexNoteContent() {
+//             foundWords.clear();
+//             
+//             logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");
+//             
+//             logger.log(logger.EXTREME, "Getting note content");
+//             Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);
+//             String data;
+//             if (indexNoteBody) {
+//                     data = n.getContent();
+//                     data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());
+//             
+//                     logger.log(logger.EXTREME, "Removing any encrypted data");
+//                     data = removeEnCrypt(data.toString());
+//                     logger.log(logger.EXTREME, "Removing xml markups");
+//             } else
+//                     data = "";
+//             String text;
+//             if (indexNoteTitle)
+//                     text =  removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());
+//             else
+//                     text = removeTags(StringEscapeUtils.unescapeHtml4(data));
+//                             
+//             logger.log(logger.EXTREME, "Splitting words");
+//             String[] result = text.toString().split(regex);
+//             conn.commitTransaction();
+//             conn.beginTransaction();
+//             logger.log(logger.EXTREME, "Deleting existing words for note from index");
+//             conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");
+//             
+//             logger.log(logger.EXTREME, "Number of words found: " +result.length);
+//             for (int j=0; j<result.length && keepRunning; j++) {
+//                     if (interrupt) {
+//                             processInterrupt();
+//                     }
+//                     if (!result[j].trim().equals("")) {
+//                             logger.log(logger.EXTREME, "Result word: " +result[j].trim());
+//                             addToIndex(guid, result[j], "CONTENT");
+//                     }
+//             }
+//             
+//             // Add tags
+//             for (int j=0; j<n.getTagNamesSize(); j++) {
+//                     if (n.getTagNames() != null && n.getTagNames().get(j) != null && !n.getTagNames().get(j).trim().equals(""))
+//                             addToIndex(guid, n.getTagNames().get(j), "CONTENT");
+//             }
+//             
+//             // If we were interrupted, we will reindex this note next time
+//             if (Global.keepRunning) {
+//                     logger.log(logger.EXTREME, "Resetting note guid needed");
+//                     conn.getNoteTable().setIndexNeeded(guid, false);
+//             } 
+//             conn.commitTransaction();
+//             uncommittedCount = 0;
+//             logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");
+//     }
         
         
         private String removeTags(String text) {
@@ -367,10 +366,11 @@ public class IndexRunner extends QObject implements Runnable {
                         RTFParser parser = new RTFParser();     
                         ParseContext context = new ParseContext();
                         parser.parse(input, textHandler, metadata, context);
-                       String[] result = textHandler.toString().split(regex);
-                       for (int i=0; i<result.length && keepRunning; i++) {
-                               addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
-                       }
+//                     String[] result = textHandler.toString().split(regex);
+//                     for (int i=0; i<result.length && keepRunning; i++) {
+//                             addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+//                     }
+                       updateResourceText(r.getGuid(), textHandler.toString());
                         input.close();
                 
                         f.close();
@@ -414,13 +414,14 @@ public class IndexRunner extends QObject implements Runnable {
                         OpenDocumentParser parser = new OpenDocumentParser();   
                         ParseContext context = new ParseContext();
                         parser.parse(input, textHandler, metadata, context);
-                       String[] result = textHandler.toString().split(regex);
-                       for (int i=0; i<result.length && keepRunning; i++) {
-                               if (interrupt) {
-                                       processInterrupt();
-                               }
-                               addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
-                       }
+//                     String[] result = textHandler.toString().split(regex);
+//                     for (int i=0; i<result.length && keepRunning; i++) {
+//                             if (interrupt) {
+//                                     processInterrupt();
+//                             }
+//                             addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+//                     }
+                       updateResourceText(r.getGuid(), textHandler.toString());
                         input.close();
                 
                         f.close();
@@ -464,13 +465,14 @@ public class IndexRunner extends QObject implements Runnable {
                         OfficeParser parser = new OfficeParser();       
                         ParseContext context = new ParseContext();
                         parser.parse(input, textHandler, metadata, context);
-                       String[] result = textHandler.toString().split(regex);
-                       for (int i=0; i<result.length && keepRunning; i++) {
-                               if (interrupt) {
-                                       processInterrupt();
-                               }
-                               addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
-                       }
+//                     String[] result = textHandler.toString().split(regex);
+//                     for (int i=0; i<result.length && keepRunning; i++) {
+//                             if (interrupt) {
+//                                     processInterrupt();
+//                             }
+//                             addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+//                     }
+                       updateResourceText(r.getGuid(), textHandler.toString());
                         input.close();
                 
                         f.close();
@@ -515,13 +517,14 @@ public class IndexRunner extends QObject implements Runnable {
                         PDFParser parser = new PDFParser();     
                         ParseContext context = new ParseContext();
                         parser.parse(input, textHandler, metadata, context);
-                       String[] result = textHandler.toString().split(regex);
-                       for (int i=0; i<result.length && keepRunning; i++) {
-                               if (interrupt) {
-                                       processInterrupt();
-                               }
-                               addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
-                       }
+//                     String[] result = textHandler.toString().split(regex);
+//                     for (int i=0; i<result.length && keepRunning; i++) {
+//                             if (interrupt) {
+//                                     processInterrupt();
+//                             }
+//                             addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+//                     }
+                       updateResourceText(r.getGuid(), textHandler.toString());
                         input.close();
                 
                         f.close();
@@ -565,13 +568,14 @@ public class IndexRunner extends QObject implements Runnable {
                         OOXMLParser parser = new OOXMLParser(); 
                         ParseContext context = new ParseContext();
                         parser.parse(input, textHandler, metadata, context);
-                       String[] result = textHandler.toString().split(regex);
-                       for (int i=0; i<result.length && keepRunning; i++) {
-                               if (interrupt) {
-                                       processInterrupt();
-                               }
-                               addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
-                       }
+//                     String[] result = textHandler.toString().split(regex);
+//                     for (int i=0; i<result.length && keepRunning; i++) {
+//                             if (interrupt) {
+//                                     processInterrupt();
+//                             }
+//                             addToIndex(r.getNoteGuid(), result[i], "RESOURCE");
+//                     }
+                       updateResourceText(r.getGuid(), textHandler.toString());
                         input.close();
                 
                         f.close();
@@ -624,57 +628,62 @@ public class IndexRunner extends QObject implements Runnable {
         }
  
         
-       private void addToIndex(String guid, String word, String type) {
-               if (foundWords.contains(word))
-                       return;
-               StringBuffer buffer = new StringBuffer(word.toLowerCase());
-               for (int i=buffer.length()-1; i>=0; i--) {
-                       if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)
-                               buffer.deleteCharAt(i);
-                       else
-                               break;
-               }
-               buffer = buffer.reverse();
-               for (int i=buffer.length()-1; i>=0; i--) {
-                       if (!Character.isLetterOrDigit(buffer.charAt(i)))
-                               buffer.deleteCharAt(i);
-                       else
-                               break;
-               }
-               buffer = buffer.reverse();
-               if (buffer.length() > 0) {
-                       // We have a good word, now let's trim off junk at the beginning or end
-                       if (!foundWords.contains(buffer.toString())) {
-                               foundWords.add(buffer.toString());
-                               foundWords.add(word);
-                               conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);
-                               uncommittedCount++;
-                               if (uncommittedCount > 100) {
-                                       conn.commitTransaction();
-                                       uncommittedCount=0;
-                               }
-                       }
-               }
-               return;
+//     private void addToIndex(String guid, String word, String type) {
+//             if (foundWords.contains(word))
+//                     return;
+//             StringBuffer buffer = new StringBuffer(word.toLowerCase());
+//             for (int i=buffer.length()-1; i>=0; i--) {
+//                     if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)
+//                             buffer.deleteCharAt(i);
+//                     else
+//                             break;
+//             }
+//             buffer = buffer.reverse();
+//             for (int i=buffer.length()-1; i>=0; i--) {
+//                     if (!Character.isLetterOrDigit(buffer.charAt(i)))
+//                             buffer.deleteCharAt(i);
+//                     else
+//                             break;
+//             }
+//             buffer = buffer.reverse();
+//             if (buffer.length() > 0) {
+//                     // We have a good word, now let's trim off junk at the beginning or end
+//                     if (!foundWords.contains(buffer.toString())) {
+//                             foundWords.add(buffer.toString());
+//                             foundWords.add(word);
+//                             conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);
+//                             uncommittedCount++;
+//                             if (uncommittedCount > 100) {
+//                                     conn.commitTransaction();
+//                                     uncommittedCount=0;
+//                             }
+//                     }
+//             }
+//             return;
+//     }
+       
+       // ノートリソーステーブルのリソーステキストに追加
+       private void updateResourceText(String guid, String text) {
+               conn.getNoteTable().noteResourceTable.updateResourceText(guid, text);
         }
         
         private void scanUnindexed() {
-               List<String> notes = conn.getNoteTable().getUnindexed();
+//             List<String> notes = conn.getNoteTable().getUnindexed();
                 guid = null;
                 boolean started = false;
-               if (notes.size() > 0) {
-                       signal.indexStarted.emit();
-                       started = true;
-               }
-               for (int i=0; i<notes.size() && keepRunning; i++) {
-                       if (interrupt) {
-                               processInterrupt();
-                       }
-                       guid = notes.get(i);
-                       if (guid != null && keepRunning) {
-                               indexNoteContent();
-                       }
-               }
+//             if (notes.size() > 0) {
+//                     signal.indexStarted.emit();
+//                     started = true;
+//             }
+//             for (int i=0; i<notes.size() && keepRunning; i++) {
+//                     if (interrupt) {
+//                             processInterrupt();
+//                     }
+//                     guid = notes.get(i);
+//                     if (guid != null && keepRunning) {
+//                             indexNoteContent();
+//                     }
+//             }
                 
                 List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();
                 if (unindexedResources.size() > 0 && !started) {