OSDN Git Service

Test for adding tags names to search index (currently commented out).
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / threads / IndexRunner.java
index e0a122c..3c63f5d 100644 (file)
@@ -1,5 +1,5 @@
 /*\r
- * This file is part of NeverNote \r
+ * This file is part of NixNote \r
  * Copyright 2009 Randy Baumgarte\r
  * \r
  * This file may be licensed under the terms of of the\r
 \r
 package cx.fbn.nevernote.threads;\r
 \r
-import java.io.ByteArrayInputStream;\r
-import java.io.ByteArrayOutputStream;\r
 import java.io.File;\r
 import java.io.FileInputStream;\r
 import java.io.FileNotFoundException;\r
 import java.io.IOException;\r
 import java.io.InputStream;\r
+import java.util.List;\r
+import java.util.TreeSet;\r
 import java.util.concurrent.LinkedBlockingQueue;\r
+import java.util.concurrent.locks.LockSupport;\r
 \r
-import org.apache.commons.lang.StringEscapeUtils;\r
+import org.apache.commons.lang3.StringEscapeUtils;\r
 import org.apache.tika.exception.TikaException;\r
 import org.apache.tika.metadata.Metadata;\r
 import org.apache.tika.parser.ParseContext;\r
 import org.apache.tika.parser.microsoft.OfficeParser;\r
 import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;\r
-import org.apache.tika.parser.odf.OpenDocumentContentParser;\r
+import org.apache.tika.parser.odf.OpenDocumentParser;\r
 import org.apache.tika.parser.pdf.PDFParser;\r
 import org.apache.tika.parser.rtf.RTFParser;\r
 import org.apache.tika.sax.BodyContentHandler;\r
-import org.w3c.tidy.Tidy;\r
 import org.xml.sax.ContentHandler;\r
 import org.xml.sax.SAXException;\r
 \r
@@ -54,6 +54,7 @@ import com.trolltech.qt.xml.QDomElement;
 import com.trolltech.qt.xml.QDomNodeList;\r
 \r
 import cx.fbn.nevernote.Global;\r
+import cx.fbn.nevernote.signals.IndexSignal;\r
 import cx.fbn.nevernote.signals.NoteResourceSignal;\r
 import cx.fbn.nevernote.signals.NoteSignal;\r
 import cx.fbn.nevernote.sql.DatabaseConnection;\r
@@ -67,24 +68,32 @@ public class IndexRunner extends QObject implements Runnable {
        public volatile NoteSignal                      noteSignal;\r
        public volatile NoteResourceSignal      resourceSignal;\r
        private int                                                     indexType;\r
-       public final int                                        CONTENT=1; \r
-       public final int                                        RESOURCE=2;\r
+       public final int                                        SCAN=1; \r
+       public final int                                        REINDEXALL=2;\r
+       public final int                                        REINDEXNOTE=3;\r
        public boolean                                          keepRunning;\r
        private final QDomDocument                      doc;\r
        private static String                           regex = Global.getWordRegex();\r
+       public String                                           specialIndexCharacters = "";\r
+       public boolean                                          indexNoteBody = true;\r
+       public boolean                                          indexNoteTitle = true;\r
+       public boolean                                          indexImageRecognition = true;\r
        private final DatabaseConnection        conn;\r
        private volatile LinkedBlockingQueue<String> workQueue;\r
        private static int MAX_QUEUED_WAITING = 1000;\r
+       public boolean interrupt;\r
+       public boolean idle;\r
+       public boolean indexAttachmentsLocally = true;\r
+       public volatile IndexSignal                     signal;\r
+       private final TreeSet<String>           foundWords;\r
+       int uncommittedCount = 0;\r
 \r
        \r
-\r
-       \r
-       public IndexRunner(String logname, String u, String uid, String pswd, String cpswd) {\r
+       public IndexRunner(String logname, String u, String i, String r, String uid, String pswd, String cpswd) {\r
+               foundWords = new TreeSet<String>();\r
                logger = new ApplicationLogger(logname);\r
-               conn = new DatabaseConnection(logger, u, uid, pswd, cpswd);\r
-               noteSignal = new NoteSignal();\r
-               resourceSignal = new NoteResourceSignal();\r
-               indexType = CONTENT;\r
+               conn = new DatabaseConnection(logger, u, i, r, uid, pswd, cpswd, 500);\r
+               indexType = SCAN;\r
                guid = null;\r
                keepRunning = true;\r
                doc = new QDomDocument();\r
@@ -99,88 +108,131 @@ public class IndexRunner extends QObject implements Runnable {
        @Override\r
        public void run() {\r
                thread().setPriority(Thread.MIN_PRIORITY);\r
+               noteSignal = new NoteSignal();\r
+               resourceSignal = new NoteResourceSignal();\r
+               signal = new IndexSignal();\r
                logger.log(logger.EXTREME, "Starting index thread ");\r
                while (keepRunning) {\r
+                       idle=true;\r
                        try {\r
+                               conn.commitTransaction();\r
+                               uncommittedCount = 0;\r
                                String work = workQueue.take();\r
-                               if (work.startsWith("CONTENT")) {\r
-                                       work = work.replace("CONTENT ", "");\r
-                                       guid = work;\r
-                                       indexType = CONTENT;\r
+                               idle=false;\r
+                               if (work.startsWith("SCAN")) {\r
+                                       guid=null;\r
+                                       interrupt = false;\r
+                                       indexType = SCAN;\r
+                               }\r
+                               if (work.startsWith("REINDEXALL")) {\r
+                                       guid = null;\r
+                                       indexType=REINDEXALL;\r
                                }\r
-                               if (work.startsWith("RESOURCE")) {\r
-                                       work = work.replace("RESOURCE ", "");\r
+                               if (work.startsWith("REINDEXNOTE")) {\r
+                                       work = work.replace("REINDEXNOTE ", "");\r
                                        guid = work;\r
-                                       indexType = RESOURCE;\r
+                                       indexType = REINDEXNOTE;\r
                                }\r
                                if (work.startsWith("STOP")) {\r
                                        keepRunning = false;\r
-                                       guid = work;\r
-                               }\r
-                               if (guid == null || guid.trim().equals("")) {\r
-                                       setIndexType(0);\r
-                                       resourceSignal.resourceIndexed.emit("null or empty guid");\r
+                                       guid = null;\r
                                }\r
                                logger.log(logger.EXTREME, "Type:" +indexType);\r
-                               if (indexType == CONTENT && keepRunning) {\r
-                                       logger.log(logger.MEDIUM, "Indexing note: "+guid);\r
-                                       indexNoteContent();\r
+                               if (indexType == SCAN && keepRunning) {\r
+                                       logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");\r
+                                       scanUnindexed();\r
                                        setIndexType(0);\r
                                }\r
-                               if (indexType == RESOURCE && keepRunning) {\r
-                                       logger.log(logger.MEDIUM, "Indexing resource: "+guid);\r
-                                       indexResource();\r
+                               if (indexType == REINDEXALL && keepRunning) {\r
+                                       logger.log(logger.MEDIUM, "Marking all for reindex");\r
+                                       reindexAll();\r
                                        setIndexType(0);\r
                                }\r
+                               if (indexType == REINDEXNOTE && keepRunning) {\r
+                                       reindexNote();\r
+                               }\r
                        } catch (InterruptedException e) {\r
-                               // TODO Auto-generated catch block\r
-                               e.printStackTrace();\r
+                               logger.log(logger.LOW, "Thread interrupted exception: " +e.getMessage());\r
                        }\r
                }\r
+               logger.log(logger.EXTREME, "Shutting down database");\r
                conn.dbShutdown();\r
+               logger.log(logger.EXTREME, "Database shut down.  Exiting thread");\r
        }\r
        \r
        // Reindex a note\r
        public void indexNoteContent() {\r
+               foundWords.clear();\r
+               \r
                logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
                \r
                logger.log(logger.EXTREME, "Getting note content");\r
                Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
-               String data = n.getContent();\r
+               String data;\r
+               if (indexNoteBody) {\r
+                       data = n.getContent();\r
+                       data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
                \r
-               logger.log(logger.EXTREME, "Removing any encrypted data");\r
-               data = removeEnCrypt(data);\r
-               logger.log(logger.EXTREME, "Removing xml markups");\r
-               // These HTML characters need to be replaced by a space, or they'll cause words to jam together\r
-//             data = data.toLowerCase().replace("<br>", " ").replace("<hr>", " ").replace("<p>", " ").replace("<href>", " ");\r
-//             String text = StringEscapeUtils.unescapeHtml(data.replaceAll("\\<.*?\\>", ""));\r
-               Tidy tidy = new Tidy();\r
-               tidy.getStderr().close();  // the listener will capture messages\r
-               tidy.setXmlTags(true);\r
-               byte html[] = data.getBytes();\r
-               ByteArrayInputStream is = new ByteArrayInputStream(html);\r
-               ByteArrayOutputStream os = new ByteArrayOutputStream();\r
-               tidy.parse(is, os);\r
-               String text =  StringEscapeUtils.unescapeHtml(os.toString().replaceAll("\\<.*?\\>", "")) +" "+\r
-               n.getTitle();\r
+                       logger.log(logger.EXTREME, "Removing any encrypted data");\r
+                       data = removeEnCrypt(data.toString());\r
+                       logger.log(logger.EXTREME, "Removing xml markups");\r
+               } else\r
+                       data = "";\r
+               String text;\r
+               if (indexNoteTitle)\r
+                       text =  removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());\r
+               else\r
+                       text = removeTags(StringEscapeUtils.unescapeHtml4(data));\r
                                \r
                logger.log(logger.EXTREME, "Splitting words");\r
                String[] result = text.toString().split(regex);\r
+               conn.commitTransaction();\r
+               conn.beginTransaction();\r
                logger.log(logger.EXTREME, "Deleting existing words for note from index");\r
                conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");\r
                \r
                logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
                for (int j=0; j<result.length && keepRunning; j++) {\r
-                       logger.log(logger.EXTREME, "Result word: " +result[j]);\r
-                       addToIndex(guid, result[j], "CONTENT");\r
+                       if (interrupt) {\r
+                               processInterrupt();\r
+                       }\r
+                       if (!result[j].trim().equals("")) {\r
+                               logger.log(logger.EXTREME, "Result word: " +result[j].trim());\r
+                               addToIndex(guid, result[j], "CONTENT");\r
+                       }\r
+               }\r
+               \r
+               // Add tags\r
+               for (int j=0; j<n.getTagNamesSize(); j++) {\r
+                       if (n.getTagNames() != null && n.getTagNames().get(j) != null && !n.getTagNames().get(j).trim().equals(""))\r
+                               addToIndex(guid, n.getTagNames().get(j), "CONTENT");\r
                }\r
+               \r
                // If we were interrupted, we will reindex this note next time\r
                if (Global.keepRunning) {\r
                        logger.log(logger.EXTREME, "Resetting note guid needed");\r
                        conn.getNoteTable().setIndexNeeded(guid, false);\r
-               }\r
+               } \r
+               conn.commitTransaction();\r
+               uncommittedCount = 0;\r
                logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
        }\r
+       \r
+       \r
+       private String removeTags(String text) {\r
+               StringBuffer buffer = new StringBuffer(text);\r
+               boolean inTag = false;\r
+               for (int i=buffer.length()-1; i>=0; i--) {\r
+                       if (buffer.charAt(i) == '>')\r
+                               inTag = true;\r
+                       if (buffer.charAt(i) == '<')\r
+                               inTag = false;\r
+                       if (inTag || buffer.charAt(i) == '<')\r
+                               buffer.deleteCharAt(i);\r
+               }\r
+               \r
+               return buffer.toString();\r
+       }\r
 \r
        \r
        public synchronized boolean addWork(String request) {\r
@@ -199,16 +251,24 @@ public class IndexRunner extends QObject implements Runnable {
                \r
                if (guid == null)\r
                        return;\r
-               \r
+               foundWords.clear();\r
                Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);\r
-               if (r == null || r.getRecognition() == null || r.getRecognition().getBody() == null || r.getRecognition().getBody().length == 0) \r
+               if (!indexImageRecognition || \r
+                               r == null || r.getRecognition() == null || \r
+                               r.getRecognition().getBody() == null || \r
+                               r.getRecognition().getBody().length == 0) \r
                        resourceBinary = new QByteArray(" ");\r
                else\r
                        resourceBinary = new QByteArray(r.getRecognition().getBody());\r
                \r
+               conn.commitTransaction();\r
+               conn.beginTransaction();\r
                conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");\r
                // This is due to an old bug & can be removed at some point in the future 11/23/2010\r
                conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");   \r
+               conn.commitTransaction();\r
+               uncommittedCount = 0;\r
+               conn.beginTransaction();\r
                        \r
                doc.setContent(resourceBinary);\r
                QDomElement docElem = doc.documentElement();\r
@@ -216,55 +276,84 @@ public class IndexRunner extends QObject implements Runnable {
                // look for text tags\r
                QDomNodeList anchors = docElem.elementsByTagName("t");\r
                for (int i=0; i<anchors.length() && keepRunning; i++) {\r
+                       if (interrupt) {\r
+                               if (interrupt) {\r
+                                       processInterrupt();\r
+                               }\r
+                       }\r
                        QDomElement enmedia = anchors.at(i).toElement();\r
                        String weight = new String(enmedia.attribute("w"));\r
                        String text = new String(enmedia.text()).toLowerCase();\r
                        if (!text.equals("")) {\r
                                conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));\r
+                               uncommittedCount++;\r
+                               if (uncommittedCount > 100) {\r
+                                       conn.commitTransaction();\r
+                                       uncommittedCount=0;\r
+                               }\r
                        }\r
                }\r
                \r
-               if (Global.keepRunning) {\r
+               if (Global.keepRunning && indexAttachmentsLocally) {\r
+                       conn.commitTransaction();\r
+                       uncommittedCount = 0;\r
+                       conn.beginTransaction();\r
                        indexResourceContent(guid);\r
                }\r
-               \r
+                               \r
                if (Global.keepRunning)\r
                        conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
+               conn.commitTransaction();\r
+               uncommittedCount = 0;\r
        }\r
        \r
        private void indexResourceContent(String guid) {\r
                Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);\r
-               if (r.getMime().equalsIgnoreCase("application/pdf")) {\r
-                       indexResourcePDF(r);\r
-                       return;\r
-               }\r
-               if (r.getMime().equalsIgnoreCase("application/docx") || \r
-                       r.getMime().equalsIgnoreCase("application/xlsx") || \r
-                       r.getMime().equalsIgnoreCase("application/pptx")) {\r
-                       indexResourceOOXML(r);\r
-                       return;\r
-               }\r
-               if (r.getMime().equalsIgnoreCase("application/vsd") ||\r
-                       r.getMime().equalsIgnoreCase("application/ppt") ||\r
-                       r.getMime().equalsIgnoreCase("application/xls") ||\r
-                       r.getMime().equalsIgnoreCase("application/msg") ||\r
-                       r.getMime().equalsIgnoreCase("application/doc")) {\r
+               if (r != null && r.getMime() != null) {\r
+                       if (r.getMime().equalsIgnoreCase("application/pdf")) {\r
+                               indexResourcePDF(r);\r
+                               return;\r
+                       }\r
+                       if (r.getMime().equalsIgnoreCase("application/docx") || \r
+                               r.getMime().equalsIgnoreCase("application/xlsx") || \r
+                               r.getMime().equalsIgnoreCase("application/pptx")) {\r
+                               indexResourceOOXML(r);\r
+                               return;\r
+                       }\r
+                       if (r.getMime().equalsIgnoreCase("application/vsd") ||\r
+                                       r.getMime().equalsIgnoreCase("application/ppt") ||\r
+                                       r.getMime().equalsIgnoreCase("application/xls") ||\r
+                                       r.getMime().equalsIgnoreCase("application/msg") ||\r
+                                       r.getMime().equalsIgnoreCase("application/doc")) {\r
                                indexResourceOffice(r);\r
                                return;\r
-               }\r
-               if (r.getMime().equalsIgnoreCase("application/rtf")) {\r
+                       }\r
+                       if (r.getMime().equalsIgnoreCase("application/rtf")) {\r
                                        indexResourceRTF(r);\r
                                        return;\r
-               }\r
-               if (r.getMime().equalsIgnoreCase("application/odf")) {\r
-                       indexResourceODF(r);\r
-                       return;\r
+                       }\r
+                       if (r.getMime().equalsIgnoreCase("application/odf") ||\r
+                               r.getMime().equalsIgnoreCase("application/odt") ||\r
+                               r.getMime().equalsIgnoreCase("application/odp") ||\r
+                               r.getMime().equalsIgnoreCase("application/odg") ||\r
+                               r.getMime().equalsIgnoreCase("application/odb") ||\r
+                               r.getMime().equalsIgnoreCase("application/ods")) {\r
+                               indexResourceODF(r);\r
+                               return;\r
+                       }\r
                }\r
        }\r
 \r
 \r
        private void indexResourceRTF(Resource r) {\r
-               QTemporaryFile f = writeResource(r.getData());\r
+\r
+               Data d = r.getData();\r
+               for (int i=0; i<20 && d.getSize() == 0; i++)\r
+                       d = r.getData();\r
+               if (d.getSize()== 0)\r
+                       return;\r
+\r
+               QTemporaryFile f = writeResource(d);\r
                if (!keepRunning) {\r
                        return;\r
                }\r
@@ -272,7 +361,7 @@ public class IndexRunner extends QObject implements Runnable {
                InputStream input;\r
                try {\r
                        input = new FileInputStream(new File(f.fileName()));\r
-                       ContentHandler textHandler = new BodyContentHandler();\r
+                       ContentHandler textHandler = new BodyContentHandler(-1);\r
                        Metadata metadata = new Metadata();\r
                        RTFParser parser = new RTFParser();     \r
                        ParseContext context = new ParseContext();\r
@@ -287,23 +376,31 @@ public class IndexRunner extends QObject implements Runnable {
                } catch (java.lang.ClassCastException e) {\r
                        logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
                } catch (FileNotFoundException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
                } catch (IOException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
                } catch (SAXException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
                } catch (TikaException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
+               } catch (Exception e) {\r
+                       logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
+               } catch (java.lang.NoSuchMethodError e) {\r
+                       logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
+               } catch (Error e) {\r
+                       logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
                }\r
        }\r
 \r
        \r
        private void indexResourceODF(Resource r) {\r
-               QTemporaryFile f = writeResource(r.getData());\r
+\r
+               Data d = r.getData();\r
+               for (int i=0; i<20 && d.getSize() == 0; i++)\r
+                       d = r.getData();\r
+               if (d.getSize()== 0)\r
+                       return;\r
+               QTemporaryFile f = writeResource(d);\r
                if (!keepRunning) {\r
                        return;\r
                }\r
@@ -311,13 +408,16 @@ public class IndexRunner extends QObject implements Runnable {
                InputStream input;\r
                try {\r
                        input = new FileInputStream(new File(f.fileName()));\r
-                       ContentHandler textHandler = new BodyContentHandler();\r
+                       ContentHandler textHandler = new BodyContentHandler(-1);\r
                        Metadata metadata = new Metadata();\r
-                       OpenDocumentContentParser parser = new OpenDocumentContentParser();     \r
+                       OpenDocumentParser parser = new OpenDocumentParser();   \r
                        ParseContext context = new ParseContext();\r
                        parser.parse(input, textHandler, metadata, context);\r
                        String[] result = textHandler.toString().split(regex);\r
                        for (int i=0; i<result.length && keepRunning; i++) {\r
+                               if (interrupt) {\r
+                                       processInterrupt();\r
+                               }\r
                                addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
                        }\r
                        input.close();\r
@@ -326,23 +426,31 @@ public class IndexRunner extends QObject implements Runnable {
                } catch (java.lang.ClassCastException e) {\r
                        logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
                } catch (FileNotFoundException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
                } catch (IOException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
                } catch (SAXException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
                } catch (TikaException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
+               } catch (Exception e) {\r
+                       logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
+               } catch (java.lang.NoSuchMethodError e) {\r
+                       logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
+               } catch (Error e) {\r
+                       logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
                }\r
        }\r
 \r
        \r
        private void indexResourceOffice(Resource r) {\r
-               QTemporaryFile f = writeResource(r.getData());\r
+\r
+               Data d = r.getData();\r
+               for (int i=0; i<20 && d.getSize() == 0; i++)\r
+                       d = r.getData();\r
+               if (d.getSize()== 0)\r
+                       return;\r
+               QTemporaryFile f = writeResource(d);\r
                if (!keepRunning) {\r
                        return;\r
                }\r
@@ -350,13 +458,16 @@ public class IndexRunner extends QObject implements Runnable {
                InputStream input;\r
                try {\r
                        input = new FileInputStream(new File(f.fileName()));\r
-                       ContentHandler textHandler = new BodyContentHandler();\r
+                       ContentHandler textHandler = new BodyContentHandler(-1);\r
                        Metadata metadata = new Metadata();\r
                        OfficeParser parser = new OfficeParser();       \r
                        ParseContext context = new ParseContext();\r
                        parser.parse(input, textHandler, metadata, context);\r
                        String[] result = textHandler.toString().split(regex);\r
                        for (int i=0; i<result.length && keepRunning; i++) {\r
+                               if (interrupt) {\r
+                                       processInterrupt();\r
+                               }\r
                                addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
                        }\r
                        input.close();\r
@@ -365,38 +476,49 @@ public class IndexRunner extends QObject implements Runnable {
                } catch (java.lang.ClassCastException e) {\r
                        logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
                } catch (FileNotFoundException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
                } catch (IOException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
                } catch (SAXException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
                } catch (TikaException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
+               } catch (Exception e) {\r
+                       logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
+               } catch (java.lang.NoSuchMethodError e) {\r
+                       logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
+               } catch (Error e) {\r
+                       logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
                }\r
        }\r
 \r
        \r
        \r
        private void indexResourcePDF(Resource r) {\r
-               QTemporaryFile f = writeResource(r.getData());\r
+\r
+               Data d = r.getData();\r
+               for (int i=0; i<20 && d.getSize() == 0; i++)\r
+                       d = r.getData();\r
+               if (d.getSize()== 0)\r
+                       return;\r
+               QTemporaryFile f = writeResource(d);\r
                if (!keepRunning) {\r
                        return;\r
                }\r
                \r
                InputStream input;\r
-               try {\r
+               try {                   \r
                        input = new FileInputStream(new File(f.fileName()));\r
-                       ContentHandler textHandler = new BodyContentHandler();\r
+                       ContentHandler textHandler = new BodyContentHandler(-1);\r
                        Metadata metadata = new Metadata();\r
                        PDFParser parser = new PDFParser();     \r
                        ParseContext context = new ParseContext();\r
                        parser.parse(input, textHandler, metadata, context);\r
                        String[] result = textHandler.toString().split(regex);\r
                        for (int i=0; i<result.length && keepRunning; i++) {\r
+                               if (interrupt) {\r
+                                       processInterrupt();\r
+                               }\r
                                addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
                        }\r
                        input.close();\r
@@ -405,23 +527,31 @@ public class IndexRunner extends QObject implements Runnable {
                } catch (java.lang.ClassCastException e) {\r
                        logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
                } catch (FileNotFoundException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
                } catch (IOException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
                } catch (SAXException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
                } catch (TikaException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
+               } catch (Exception e) {\r
+                       logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
+               } catch (java.lang.NoSuchMethodError e) {\r
+                       logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
+               } catch (Error e) {\r
+                       logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
                }\r
        }\r
        \r
        \r
        private void indexResourceOOXML(Resource r) {\r
-               QTemporaryFile f = writeResource(r.getData());\r
+\r
+               Data d = r.getData();\r
+               for (int i=0; i<20 && d.getSize() == 0; i++)\r
+                       d = r.getData();\r
+               if (d.getSize()== 0)\r
+                       return;\r
+               QTemporaryFile f = writeResource(d);\r
                if (!keepRunning) {\r
                        return;\r
                }\r
@@ -429,13 +559,16 @@ public class IndexRunner extends QObject implements Runnable {
                InputStream input;\r
                try {\r
                        input = new FileInputStream(new File(f.fileName()));\r
-                       ContentHandler textHandler = new BodyContentHandler();\r
+                       ContentHandler textHandler = new BodyContentHandler(-1);\r
                        Metadata metadata = new Metadata();\r
                        OOXMLParser parser = new OOXMLParser(); \r
                        ParseContext context = new ParseContext();\r
                        parser.parse(input, textHandler, metadata, context);\r
                        String[] result = textHandler.toString().split(regex);\r
                        for (int i=0; i<result.length && keepRunning; i++) {\r
+                               if (interrupt) {\r
+                                       processInterrupt();\r
+                               }\r
                                addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
                        }\r
                        input.close();\r
@@ -444,18 +577,19 @@ public class IndexRunner extends QObject implements Runnable {
                } catch (java.lang.ClassCastException e) {\r
                        logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
                } catch (FileNotFoundException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
                } catch (IOException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
                } catch (SAXException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
+                       logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
                } catch (TikaException e) {\r
-                       // TODO Auto-generated catch block\r
-                       e.printStackTrace();\r
-               }\r
+                       logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
+               } catch (Exception e) {\r
+                       logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
+               } catch (java.lang.NoSuchMethodError e) {\r
+                       logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
+               } catch (Error e) {\r
+                       logger.log(logger.LOW, "Unknown error: " +e.getMessage());              }\r
        }\r
        \r
 \r
@@ -466,7 +600,7 @@ public class IndexRunner extends QObject implements Runnable {
                newFile.write(d.getBody());\r
                newFile.close();\r
                return newFile;\r
-       }\r
+       } \r
 \r
        \r
        private String removeEnCrypt(String content) {\r
@@ -474,6 +608,9 @@ public class IndexRunner extends QObject implements Runnable {
                int endPos;\r
                boolean tagFound = true;\r
                while (tagFound && keepRunning) {\r
+                       if (interrupt) {\r
+                               processInterrupt();\r
+                       }\r
                        endPos = content.indexOf("</en-crypt>", index)+11;\r
                        if (endPos > -1 && index > -1) {\r
                                content = content.substring(0,index)+content.substring(endPos);\r
@@ -487,33 +624,116 @@ public class IndexRunner extends QObject implements Runnable {
 \r
        \r
        private void addToIndex(String guid, String word, String type) {\r
-               if (word.length() > 0) {\r
+               if (foundWords.contains(word))\r
+                       return;\r
+               StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
+               for (int i=buffer.length()-1; i>=0; i--) {\r
+                       if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)\r
+                               buffer.deleteCharAt(i);\r
+                       else\r
+                               break;\r
+               }\r
+               buffer = buffer.reverse();\r
+               for (int i=buffer.length()-1; i>=0; i--) {\r
+                       if (!Character.isLetterOrDigit(buffer.charAt(i)))\r
+                               buffer.deleteCharAt(i);\r
+                       else\r
+                               break;\r
+               }\r
+               buffer = buffer.reverse();\r
+               if (buffer.length() > 0) {\r
                        // We have a good word, now let's trim off junk at the beginning or end\r
-                       StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
-                       for (int x = buffer.length()-1; x>=0; x--) {\r
-                               if (!Character.isLetterOrDigit(buffer.charAt(x)))\r
-                                       buffer = buffer.deleteCharAt(x);\r
-                               else\r
-                                       x=-1;\r
+                       if (!foundWords.contains(buffer.toString())) {\r
+                               foundWords.add(buffer.toString());\r
+                               foundWords.add(word);\r
+                               conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
+                               uncommittedCount++;\r
+                               if (uncommittedCount > 100) {\r
+                                       conn.commitTransaction();\r
+                                       uncommittedCount=0;\r
+                               }\r
                        }\r
-                       // Things have been trimmed off the end, so reverse the string & repeat.\r
-                       buffer = buffer.reverse();\r
-                       for (int x = buffer.length()-1; x>=0; x--) {\r
-                               if (!Character.isLetterOrDigit(buffer.charAt(x)))\r
-                                       buffer = buffer.deleteCharAt(x);\r
-                               else\r
-                                       x=-1;\r
+               }\r
+               return;\r
+       }\r
+       \r
+       private void scanUnindexed() {\r
+               List<String> notes = conn.getNoteTable().getUnindexed();\r
+               guid = null;\r
+               boolean started = false;\r
+               if (notes.size() > 0) {\r
+                       signal.indexStarted.emit();\r
+                       started = true;\r
+               }\r
+               for (int i=0; i<notes.size() && keepRunning; i++) {\r
+                       if (interrupt) {\r
+                               processInterrupt();\r
+                       }\r
+                       guid = notes.get(i);\r
+                       if (guid != null && keepRunning) {\r
+                               indexNoteContent();\r
                        }\r
-                       // Restore the string back to the proper order.\r
-                       buffer = buffer.reverse();\r
+               }\r
                \r
-                       logger.log(logger.EXTREME, "Processing " +buffer);\r
-                       if (buffer.length()>=Global.minimumWordCount) {\r
-                               logger.log(logger.EXTREME, "Adding " +buffer);\r
-                               conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
+               List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();\r
+               if (unindexedResources.size() > 0 && !started) {\r
+                       signal.indexStarted.emit();\r
+                       started = true;\r
+               }\r
+               for (int i=0; i<unindexedResources.size()&& keepRunning; i++) {\r
+                       if (interrupt) {\r
+                               processInterrupt();\r
+                       }\r
+                       guid = unindexedResources.get(i);\r
+                       if (keepRunning) {\r
+                               indexResource();\r
                        }\r
                }\r
+               \r
+               // Cleanup stuff that was deleted at some point\r
+               List<String> guids = conn.getWordsTable().getGuidList();\r
+               logger.log(logger.LOW, "GUIDS in index: " +guids.size());\r
+               for (int i=0; i<guids.size() && keepRunning; i++) {\r
+                       if (!conn.getNoteTable().exists(guids.get(i))) {\r
+                               logger.log(logger.LOW, "Old GUID found: " +guids.get(i));\r
+                               conn.getWordsTable().expunge(guids.get(i));\r
+                       }\r
+               }\r
+               \r
+               if (started && keepRunning) \r
+                       signal.indexFinished.emit();\r
+       }\r
+       \r
+       private void reindexNote() {\r
+               if (guid == null)\r
+                       return;\r
+               conn.getNoteTable().setIndexNeeded(guid, true);\r
        }\r
        \r
+       private void reindexAll() {\r
+               conn.getNoteTable().reindexAllNotes();\r
+               conn.getNoteTable().noteResourceTable.reindexAll(); \r
+       }\r
 \r
+       private void waitSeconds(int len) {\r
+               long starttime = 0; // variable declared\r
+               //...\r
+               // for the first time, remember the timestamp\r
+           starttime = System.currentTimeMillis();\r
+               // the next timestamp we want to wake up\r
+               starttime += (1000.0);\r
+               // Wait until the desired next time arrives using nanosecond\r
+               // accuracy timer (wait(time) isn't accurate enough on most platforms) \r
+               LockSupport.parkNanos((Math.max(0, \r
+                   starttime - System.currentTimeMillis()) * 1000000));\r
+       }\r
+       \r
+       private void processInterrupt() {\r
+               conn.commitTransaction();\r
+               waitSeconds(1);\r
+               uncommittedCount = 0;\r
+               conn.beginTransaction();\r
+               interrupt = false;\r
+       }\r
+       \r
 }\r