OSDN Git Service

Add selective sync, cleanup debug messages, & correct network sync not disconnecting...
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / threads / IndexRunner.java
index 61158d4..fa59a94 100644 (file)
@@ -19,8 +19,6 @@
 \r
 package cx.fbn.nevernote.threads;\r
 \r
-import java.io.ByteArrayInputStream;\r
-import java.io.ByteArrayOutputStream;\r
 import java.io.File;\r
 import java.io.FileInputStream;\r
 import java.io.FileNotFoundException;\r
@@ -39,7 +37,6 @@ import org.apache.tika.parser.odf.OpenDocumentParser;
 import org.apache.tika.parser.pdf.PDFParser;\r
 import org.apache.tika.parser.rtf.RTFParser;\r
 import org.apache.tika.sax.BodyContentHandler;\r
-import org.w3c.tidy.Tidy;\r
 import org.xml.sax.ContentHandler;\r
 import org.xml.sax.SAXException;\r
 \r
@@ -80,6 +77,7 @@ public class IndexRunner extends QObject implements Runnable {
        private static int MAX_QUEUED_WAITING = 1000;\r
        public boolean interrupt;\r
        public boolean idle;\r
+       public boolean indexAttachmentsLocally = true;\r
        public volatile IndexSignal                     signal;\r
 \r
        \r
@@ -147,32 +145,26 @@ public class IndexRunner extends QObject implements Runnable {
                                e.printStackTrace();\r
                        }\r
                }\r
+               logger.log(logger.EXTREME, "Shutting down database");\r
                conn.dbShutdown();\r
+               logger.log(logger.EXTREME, "Database shut down.  Exiting thread");\r
        }\r
        \r
        // Reindex a note\r
        public void indexNoteContent() {\r
                \r
-//             if (wordMap.size() > 0)\r
-//                     wordMap.clear();\r
                logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
                \r
                logger.log(logger.EXTREME, "Getting note content");\r
                Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
                String data = n.getContent();\r
+               data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
                \r
                logger.log(logger.EXTREME, "Removing any encrypted data");\r
-               data = removeEnCrypt(data);\r
+               data = removeEnCrypt(data.toString());\r
                logger.log(logger.EXTREME, "Removing xml markups");\r
-               Tidy tidy = new Tidy();\r
-               tidy.getStderr().close();  // the listener will capture messages\r
-               tidy.setXmlTags(true);\r
-               byte html[] = data.getBytes();\r
-               ByteArrayInputStream is = new ByteArrayInputStream(html);\r
-               ByteArrayOutputStream os = new ByteArrayOutputStream();\r
-               tidy.parse(is, os);\r
-               String text =  StringEscapeUtils.unescapeHtml(os.toString().replaceAll("\\<.*?\\>", "")) +" "+\r
-               n.getTitle();\r
+               String text =  removeTags(StringEscapeUtils.unescapeHtml(data) +" "+\r
+               n.getTitle());\r
                                \r
                logger.log(logger.EXTREME, "Splitting words");\r
                String[] result = text.toString().split(regex);\r
@@ -181,8 +173,10 @@ public class IndexRunner extends QObject implements Runnable {
                \r
                logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
                for (int j=0; j<result.length && keepRunning; j++) {\r
-                       logger.log(logger.EXTREME, "Result word: " +result[j]);\r
-                       addToIndex(guid, result[j], "CONTENT");\r
+                       if (!result[j].trim().equals("")) {\r
+                               logger.log(logger.EXTREME, "Result word: " +result[j]);\r
+                               addToIndex(guid, result[j], "CONTENT");\r
+                       }\r
                }\r
                // If we were interrupted, we will reindex this note next time\r
                if (Global.keepRunning) {\r
@@ -191,6 +185,22 @@ public class IndexRunner extends QObject implements Runnable {
                }\r
                logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
        }\r
+       \r
+       \r
+       private String removeTags(String text) {\r
+               StringBuffer buffer = new StringBuffer(text);\r
+               boolean inTag = false;\r
+               for (int i=buffer.length()-1; i>=0; i--) {\r
+                       if (buffer.charAt(i) == '>')\r
+                               inTag = true;\r
+                       if (buffer.charAt(i) == '<')\r
+                               inTag = false;\r
+                       if (inTag || buffer.charAt(i) == '<')\r
+                               buffer.deleteCharAt(i);\r
+               }\r
+               \r
+               return buffer.toString();\r
+       }\r
 \r
        \r
        public synchronized boolean addWork(String request) {\r
@@ -234,10 +244,10 @@ public class IndexRunner extends QObject implements Runnable {
                        }\r
                }\r
                \r
-               if (Global.keepRunning) {\r
+               if (Global.keepRunning && indexAttachmentsLocally) {\r
                        indexResourceContent(guid);\r
                }\r
-               \r
+                               \r
                if (Global.keepRunning)\r
                        conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
        }\r
@@ -516,26 +526,7 @@ public class IndexRunner extends QObject implements Runnable {
                if (word.length() > 0) {\r
                        // We have a good word, now let's trim off junk at the beginning or end\r
                        StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
-                       for (int x = buffer.length()-1; x>=0; x--) {\r
-                               if (!Character.isLetterOrDigit(buffer.charAt(x)))\r
-                                       buffer = buffer.deleteCharAt(x);\r
-                               else\r
-                                       x=-1;\r
-                       }\r
-                       // Things have been trimmed off the end, so reverse the string & repeat.\r
-                       buffer = buffer.reverse();\r
-                       for (int x = buffer.length()-1; x>=0 && keepRunning; x--) {\r
-                               if (!Character.isLetterOrDigit(buffer.charAt(x)))\r
-                                       buffer = buffer.deleteCharAt(x);\r
-                               else\r
-                                       x=-1;\r
-                       }\r
-                       // Restore the string back to the proper order.\r
-                       buffer = buffer.reverse();\r
-               \r
-                       if (buffer.length()>=Global.minimumWordCount) {\r
-                               conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
-                       }\r
+                       conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
                }\r
                return;\r
        }\r
@@ -556,11 +547,11 @@ public class IndexRunner extends QObject implements Runnable {
                }\r
                \r
                List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();\r
-               if (notes.size() > 0 && !started) {\r
+               if (unindexedResources.size() > 0 && !started) {\r
                        signal.indexStarted.emit();\r
                        started = true;\r
                }\r
-               for (int i=0; i>unindexedResources.size()&& !interrupt && keepRunning; i++) {\r
+               for (int i=0; i<unindexedResources.size()&& !interrupt && keepRunning; i++) {\r
                        guid = unindexedResources.get(i);\r
                        if (keepRunning) {\r
                                indexResource();\r