OSDN Git Service

Fix non-latin character search problem & remove minimum word length options.
authorRandy Baumgarte <randy@fbn.cx>
Sat, 18 Dec 2010 18:49:53 +0000 (13:49 -0500)
committerRandy Baumgarte <randy@fbn.cx>
Sun, 19 Dec 2010 04:09:43 +0000 (23:09 -0500)
src/cx/fbn/nevernote/Global.java
src/cx/fbn/nevernote/NeverNote.java
src/cx/fbn/nevernote/dialog/ConfigDialog.java
src/cx/fbn/nevernote/dialog/ConfigIndexPage.java
src/cx/fbn/nevernote/filters/EnSearch.java
src/cx/fbn/nevernote/sql/NoteTable.java
src/cx/fbn/nevernote/sql/REnSearch.java
src/cx/fbn/nevernote/threads/IndexRunner.java
src/cx/fbn/nevernote/utilities/ListManager.java

index 05887c2..829bdea 100644 (file)
@@ -544,28 +544,6 @@ public class Global {
                        settings.setValue("newNoteWithSelectedTags", "false");\r
                settings.endGroup();\r
     }\r
-    public static void setMinimumWordLength(int len) {\r
-               settings.beginGroup("General");\r
-               settings.setValue("minimumWordLength", len);\r
-               settings.endGroup();            \r
-    }\r
-    public static int getMinimumWordLength() {\r
-               settings.beginGroup("General");\r
-               Integer len = 4;\r
-               try {\r
-                       String val  = (String)settings.value("minimumWordLength", "4");\r
-                       len = new Integer(val);\r
-               } catch (Exception e) {\r
-                       try {\r
-                               len = (Integer)settings.value("minimumWordLength", 4);\r
-                       } catch (Exception e1) {\r
-                               len = 4;\r
-                       }\r
-               }\r
-               settings.endGroup();\r
-               return len;\r
-               \r
-    }\r
     public static void setRecognitionWeight(int len) {\r
                settings.beginGroup("General");\r
                settings.setValue("recognitionWeight", len);\r
index 64af32c..30a200c 100644 (file)
@@ -952,8 +952,12 @@ public class NeverNote extends QMainWindow{
                Global.keepRunning = false;
                try {
                        logger.log(logger.MEDIUM, "Waiting for indexThread to stop");
-                       indexRunner.thread().join(50);
-                       logger.log(logger.MEDIUM, "Index thread has stopped");
+                       if (indexRunner.thread().isAlive())
+                               indexRunner.thread().join(50);
+                       if (!indexRunner.thread().isAlive())
+                               logger.log(logger.MEDIUM, "Index thread has stopped");
+                       else
+                               logger.log(logger.MEDIUM, "Index thread still running - bypassing");
                } catch (InterruptedException e1) {
                        e1.printStackTrace();
                }
index c8baa2d..917a91c 100644 (file)
@@ -162,9 +162,6 @@ public class ConfigDialog extends QDialog {
                Global.userStoreUrl = "https://"+debugPage.getServer()+"/edam/user";\r
                Global.setWordRegex(indexPage.getRegex());\r
                Global.setRecognitionWeight(indexPage.getRecognitionWeight());\r
-               Global.setMinimumWordLength(indexPage.getWordLength());\r
-               Global.minimumWordCount = indexPage.getWordLength();    \r
-               Global.setIndexThreads(indexPage.getIndexThreads());\r
                Global.setIndexThreadSleepInterval(indexPage.getSleepInterval());\r
                Global.setMessageLevel( debugPage.getDebugLevel());\r
                Global.saveCarriageReturnFix(debugPage.getCarriageReturnFix());\r
@@ -307,8 +304,6 @@ public class ConfigDialog extends QDialog {
                appearancePage.setMinimizeOnClose(Global.minimizeOnClose());\r
                \r
                indexPage.setRegex(Global.getWordRegex());\r
-               indexPage.setWordLength(Global.getMinimumWordLength());\r
-               indexPage.setIndexThreads(Global.getIndexThreads());\r
                indexPage.setSleepInterval(Global.getIndexThreadSleepInterval());\r
                connectionPage.setSyncInterval(Global.getSyncInterval());\r
                \r
index 7d9b81a..e3da0b1 100644 (file)
@@ -32,8 +32,6 @@ import cx.fbn.nevernote.Global;
 \r
 public class ConfigIndexPage extends QWidget {\r
 \r
-       private final QSpinBox  indexThreadSpinner;\r
-       private final QSpinBox lengthSpinner;\r
        private final QSpinBox weightSpinner;\r
        private final QSpinBox sleepSpinner;\r
        private final QCheckBox indexAttachmentsLocally;\r
@@ -41,35 +39,7 @@ public class ConfigIndexPage extends QWidget {
        \r
        public ConfigIndexPage(QWidget parent) {\r
 //             super(parent);\r
-               \r
-               indexThreadSpinner = new QSpinBox(this);\r
-               indexThreadSpinner.setMaximum(5);\r
-               indexThreadSpinner.setMinimum(1);\r
-                       \r
-               // Index threads layout\r
-               QLabel threadLabel = new QLabel(tr("Maximum Threads"));\r
-               QHBoxLayout threadsLayout = new QHBoxLayout();\r
-               threadsLayout.addWidget(threadLabel);\r
-               threadsLayout.addWidget(indexThreadSpinner);\r
-               QGroupBox threadsGroup = new QGroupBox(tr("Indexing Threads (Requires Restart)"));\r
-               threadsGroup.setLayout(threadsLayout);\r
-               \r
-               threadsGroup.setVisible(false);\r
-               \r
-               \r
-               // Minimum word length\r
-               QGroupBox wordLengthGroup = new QGroupBox(tr("Word Length"));\r
-               QLabel wordLengthLabel = new QLabel(tr("Minimum Word Length"));\r
-               lengthSpinner = new QSpinBox();\r
-               lengthSpinner.setRange(1,10);\r
-               lengthSpinner.setSingleStep(1);\r
-               lengthSpinner.setValue(Global.minimumWordCount);\r
-               \r
-               QHBoxLayout wordLengthLayout = new QHBoxLayout();\r
-               wordLengthLayout.addWidget(wordLengthLabel);\r
-               wordLengthLayout.addWidget(lengthSpinner);\r
-               wordLengthGroup.setLayout(wordLengthLayout);\r
-               \r
+                                                       \r
                // Recognition weight\r
                QGroupBox weightGroup = new QGroupBox(tr("Recognition"));\r
                QLabel weightLabel = new QLabel(tr("Minimum Recognition Weight"));\r
@@ -118,8 +88,6 @@ public class ConfigIndexPage extends QWidget {
                \r
                \r
                QVBoxLayout mainLayout = new QVBoxLayout();\r
-               mainLayout.addWidget(threadsGroup);\r
-               mainLayout.addWidget(wordLengthGroup);\r
                mainLayout.addWidget(sleepGroup);\r
                mainLayout.addWidget(weightGroup);\r
                mainLayout.addWidget(attachmentGroup);\r
@@ -130,15 +98,6 @@ public class ConfigIndexPage extends QWidget {
 \r
        }\r
        \r
-       //*****************************************\r
-       //* Word length get/set methods \r
-       //*****************************************\r
-       public void setWordLength(int len) {\r
-               lengthSpinner.setValue(len);\r
-       }\r
-       public int getWordLength() {\r
-               return lengthSpinner.value();\r
-       }\r
        \r
        //*****************************************\r
        //* Get for flag to index attachments \r
@@ -169,16 +128,6 @@ public class ConfigIndexPage extends QWidget {
                return weightSpinner.value();\r
        }\r
        \r
-       //*****************************************\r
-       //* Index Threads get/set methods\r
-       //*****************************************\r
-       public void setIndexThreads(int value) {\r
-               indexThreadSpinner.setValue(value);\r
-       }\r
-       public int getIndexThreads() {\r
-               return indexThreadSpinner.value();\r
-       }\r
-\r
        \r
        \r
        //*****************************************\r
index 0a91692..e4c7ab3 100644 (file)
@@ -35,14 +35,14 @@ public class EnSearch {
        private List<Note>                      matches;\r
        public List<String>                     hilightWords;\r
        \r
-       public EnSearch(DatabaseConnection conn, ApplicationLogger logger, String s, List<Tag> t, int len, int weight) {\r
+       public EnSearch(DatabaseConnection conn, ApplicationLogger logger, String s, List<Tag> t, int weight) {\r
                if (s == null) \r
                        return;\r
                if (s.trim().equals(""))\r
                        return;\r
                \r
                matches = null;\r
-               REnSearch request = new REnSearch(conn, logger, s, t, len, weight);\r
+               REnSearch request = new REnSearch(conn, logger, s, t, weight);\r
                matches = request.matchWords();\r
                hilightWords = request.getWords();\r
        }\r
index 38948cc..446d1a6 100644 (file)
@@ -206,6 +206,15 @@ public class NoteTable {
                query.next();\r
                return query.valueString(0);\r
        }\r
+       // Get a note's content in blob format for index.\r
+       public String getNoteContentNoUTFConversion(String guid) {\r
+               NSqlQuery query = new NSqlQuery(db.getConnection());\r
+               query.prepare("Select content from note where guid=:guid");\r
+               query.bindValue(":guid", guid);\r
+               query.exec();           \r
+               query.next();\r
+               return query.valueString(0);\r
+       }\r
        // Get a note by Guid\r
        public Note getNote(String noteGuid, boolean loadContent, boolean loadResources, boolean loadRecognition, boolean loadBinary, boolean loadTags) {\r
                if (noteGuid == null)\r
index c6502b7..ecba8b6 100644 (file)
@@ -59,15 +59,13 @@ public class REnSearch {
        private final ApplicationLogger logger;\r
 //     private final DatabaseConnection db;\r
        private boolean any;\r
-       private int     minimumWordLength = 3;\r
        private int minimumRecognitionWeight = 80;\r
        private final DatabaseConnection conn;\r
        \r
-       public REnSearch(DatabaseConnection c, ApplicationLogger l, String s, List<Tag> t, int m, int r) {\r
+       public REnSearch(DatabaseConnection c, ApplicationLogger l, String s, List<Tag> t, int r) {\r
                logger = l;\r
                conn = c;\r
                tagIndex = t;\r
-               minimumWordLength = m;\r
                minimumRecognitionWeight = r;\r
                searchWords = new ArrayList<String>();\r
                searchPhrases = new ArrayList<String>();\r
@@ -360,7 +358,6 @@ public class REnSearch {
        // subject date\r
 \r
        private void parseTerms(List<String> words) {\r
-               int minLen = minimumWordLength;\r
                \r
                for (int i=0; i<words.size(); i++) {\r
                        String word = words.get(i);\r
@@ -375,8 +372,8 @@ public class REnSearch {
                                searchPhrase=true;\r
                                searchPhrases.add(word.toLowerCase());\r
                        }\r
-                       if (!searchPhrase && pos < 0 && (word.length() >= minLen || word.indexOf('*')>=0)\r
-                               getWords().add(word);\r
+                       if (!searchPhrase && pos < 0) \r
+                               getWords().add("*"+word+"*");\r
                        if (word.startsWith("intitle:")) \r
                                intitle.add("*"+word+"*");\r
                        if (word.startsWith("-intitle:")) \r
index 208d940..a94b597 100644 (file)
@@ -19,8 +19,6 @@
 \r
 package cx.fbn.nevernote.threads;\r
 \r
-import java.io.ByteArrayInputStream;\r
-import java.io.ByteArrayOutputStream;\r
 import java.io.File;\r
 import java.io.FileInputStream;\r
 import java.io.FileNotFoundException;\r
@@ -39,7 +37,6 @@ import org.apache.tika.parser.odf.OpenDocumentParser;
 import org.apache.tika.parser.pdf.PDFParser;\r
 import org.apache.tika.parser.rtf.RTFParser;\r
 import org.apache.tika.sax.BodyContentHandler;\r
-import org.w3c.tidy.Tidy;\r
 import org.xml.sax.ContentHandler;\r
 import org.xml.sax.SAXException;\r
 \r
@@ -148,32 +145,27 @@ public class IndexRunner extends QObject implements Runnable {
                                e.printStackTrace();\r
                        }\r
                }\r
+               logger.log(logger.EXTREME, "Shutting down database");\r
                conn.dbShutdown();\r
+               logger.log(logger.EXTREME, "Database shut down.  Exiting thread");\r
        }\r
        \r
        // Reindex a note\r
        public void indexNoteContent() {\r
                \r
-//             if (wordMap.size() > 0)\r
-//                     wordMap.clear();\r
                logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
                \r
                logger.log(logger.EXTREME, "Getting note content");\r
                Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
                String data = n.getContent();\r
+               data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
+               System.out.println(data);\r
                \r
                logger.log(logger.EXTREME, "Removing any encrypted data");\r
-               data = removeEnCrypt(data);\r
+               data = removeEnCrypt(data.toString());\r
                logger.log(logger.EXTREME, "Removing xml markups");\r
-               Tidy tidy = new Tidy();\r
-               tidy.getStderr().close();  // the listener will capture messages\r
-               tidy.setXmlTags(true);\r
-               byte html[] = data.getBytes();\r
-               ByteArrayInputStream is = new ByteArrayInputStream(html);\r
-               ByteArrayOutputStream os = new ByteArrayOutputStream();\r
-               tidy.parse(is, os);\r
-               String text =  StringEscapeUtils.unescapeHtml(os.toString().replaceAll("\\<.*?\\>", "")) +" "+\r
-               n.getTitle();\r
+               String text =  removeTags(StringEscapeUtils.unescapeHtml(data) +" "+\r
+               n.getTitle());\r
                                \r
                logger.log(logger.EXTREME, "Splitting words");\r
                String[] result = text.toString().split(regex);\r
@@ -182,8 +174,10 @@ public class IndexRunner extends QObject implements Runnable {
                \r
                logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
                for (int j=0; j<result.length && keepRunning; j++) {\r
-                       logger.log(logger.EXTREME, "Result word: " +result[j]);\r
-                       addToIndex(guid, result[j], "CONTENT");\r
+                       if (!result[j].trim().equals("")) {\r
+                               logger.log(logger.EXTREME, "Result word: " +result[j]);\r
+                               addToIndex(guid, result[j], "CONTENT");\r
+                       }\r
                }\r
                // If we were interrupted, we will reindex this note next time\r
                if (Global.keepRunning) {\r
@@ -192,6 +186,22 @@ public class IndexRunner extends QObject implements Runnable {
                }\r
                logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
        }\r
+       \r
+       \r
+       private String removeTags(String text) {\r
+               StringBuffer buffer = new StringBuffer(text);\r
+               boolean inTag = false;\r
+               for (int i=buffer.length()-1; i>=0; i--) {\r
+                       if (buffer.charAt(i) == '>')\r
+                               inTag = true;\r
+                       if (buffer.charAt(i) == '<')\r
+                               inTag = false;\r
+                       if (inTag || buffer.charAt(i) == '<')\r
+                               buffer.deleteCharAt(i);\r
+               }\r
+               \r
+               return buffer.toString();\r
+       }\r
 \r
        \r
        public synchronized boolean addWork(String request) {\r
@@ -517,26 +527,7 @@ public class IndexRunner extends QObject implements Runnable {
                if (word.length() > 0) {\r
                        // We have a good word, now let's trim off junk at the beginning or end\r
                        StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
-                       for (int x = buffer.length()-1; x>=0; x--) {\r
-                               if (!Character.isLetterOrDigit(buffer.charAt(x)))\r
-                                       buffer = buffer.deleteCharAt(x);\r
-                               else\r
-                                       x=-1;\r
-                       }\r
-                       // Things have been trimmed off the end, so reverse the string & repeat.\r
-                       buffer = buffer.reverse();\r
-                       for (int x = buffer.length()-1; x>=0 && keepRunning; x--) {\r
-                               if (!Character.isLetterOrDigit(buffer.charAt(x)))\r
-                                       buffer = buffer.deleteCharAt(x);\r
-                               else\r
-                                       x=-1;\r
-                       }\r
-                       // Restore the string back to the proper order.\r
-                       buffer = buffer.reverse();\r
-               \r
-                       if (buffer.length()>=Global.minimumWordCount) {\r
-                               conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
-                       }\r
+                       conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
                }\r
                return;\r
        }\r
index c8246a0..ae8fb75 100644 (file)
@@ -199,7 +199,6 @@ public class ListManager  {
                        e.printStackTrace();\r
                }\r
 \r
-\r
        }\r
 \r
        //***************************************************************\r
@@ -268,7 +267,7 @@ public class ListManager  {
                // load saved search index\r
                setSavedSearchIndex(conn.getSavedSearchTable().getAll());\r
                // Load search helper utility\r
-               enSearch = new EnSearch(conn,  logger, "", getTagIndex(), Global.getMinimumWordLength(), Global.getRecognitionWeight());\r
+               enSearch = new EnSearch(conn,  logger, "", getTagIndex(), Global.getRecognitionWeight());\r
                logger.log(logger.HIGH, "Building note index");\r
 \r
 //             if (getMasterNoteIndex() == null) { \r
@@ -432,7 +431,7 @@ public class ListManager  {
     //***************************************************************\r
     //***************************************************************\r
        public void setEnSearch(String t) {\r
-               enSearch = new EnSearch(conn,logger, t, getTagIndex(), Global.getMinimumWordLength(), Global.getRecognitionWeight());\r
+               enSearch = new EnSearch(conn,logger, t, getTagIndex(), Global.getRecognitionWeight());\r
                enSearchChanged = true;\r
        }\r
        // Save search tags\r