OSDN Git Service

uni-gramで日本語全文検索できるように変更。ノートコンテンツとタイトルが対象。日本語全文検索時に日本語がハイライトされない問題を修正。
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / sql / REnSearch.java
index c6502b7..4ee21b7 100644 (file)
@@ -1,5 +1,5 @@
 /*\r
- * This file is part of NeverNote \r
+ * This file is part of NixNote/NeighborNote \r
  * Copyright 2009 Randy Baumgarte\r
  * \r
  * This file may be licensed under the terms of of the\r
@@ -27,12 +27,13 @@ import java.util.GregorianCalendar;
 import java.util.List;\r
 import java.util.regex.Pattern;\r
 \r
-import org.apache.commons.lang.StringEscapeUtils;\r
+import org.apache.commons.lang3.StringEscapeUtils;\r
 \r
 import com.evernote.edam.type.Note;\r
 import com.evernote.edam.type.Notebook;\r
 import com.evernote.edam.type.Tag;\r
 \r
+import cx.fbn.nevernote.Global;\r
 import cx.fbn.nevernote.sql.driver.NSqlQuery;\r
 import cx.fbn.nevernote.utilities.ApplicationLogger;\r
 \r
@@ -55,19 +56,18 @@ public class REnSearch {
        private final List<String>      sourceApplication;\r
        private final List<String>      recoType;\r
        private final List<String>      todo;\r
+       private final List<String>  stack;\r
        private final List<Tag>         tagIndex;\r
        private final ApplicationLogger logger;\r
 //     private final DatabaseConnection db;\r
        private boolean any;\r
-       private int     minimumWordLength = 3;\r
        private int minimumRecognitionWeight = 80;\r
        private final DatabaseConnection conn;\r
        \r
-       public REnSearch(DatabaseConnection c, ApplicationLogger l, String s, List<Tag> t, int m, int r) {\r
+       public REnSearch(DatabaseConnection c, ApplicationLogger l, String s, List<Tag> t, int r) {\r
                logger = l;\r
                conn = c;\r
                tagIndex = t;\r
-               minimumWordLength = m;\r
                minimumRecognitionWeight = r;\r
                searchWords = new ArrayList<String>();\r
                searchPhrases = new ArrayList<String>();\r
@@ -87,6 +87,7 @@ public class REnSearch {
                recoType = new ArrayList<String>();\r
                todo = new ArrayList<String>();\r
                any = false;\r
+               stack = new ArrayList<String>();\r
                \r
                if (s == null) \r
                        return;\r
@@ -112,11 +113,10 @@ public class REnSearch {
        public List<String> getCreated() { return created; }\r
        public List<String> getUpdated() { return updated; }\r
        public List<String> getSubjectDate() { return subjectDate; }\r
-       \r
+       public List<String> getStack() { return stack; }\r
 \r
        // match tag names\r
-       private boolean matchTagsAll(List<String> tagNames) {\r
-               List<String> list = getTags();\r
+       private boolean matchTagsAll(List<String> tagNames, List<String> list) {\r
                                \r
                for (int j=0; j<list.size(); j++) {\r
                        boolean negative = false;\r
@@ -129,32 +129,28 @@ public class REnSearch {
                        \r
                        if (tagNames.size() == 0 && !negative)\r
                                return false;\r
-                       if (tagNames.size() == 0 && negative)\r
-                               return true;\r
                        \r
-                       boolean good = false;\r
-                       for (int i=0; i<tagNames.size() && !good; i++) {                \r
+                       boolean matchFound = false;\r
+                       for (int i=0; i<tagNames.size(); i++) { \r
                                boolean matches = Pattern.matches(filterName.toLowerCase(),tagNames.get(i).toLowerCase());\r
-                               if (matches && !negative)\r
-                                       good = true;\r
-                               if (!matches && negative)\r
-                                       good = true;\r
+                               if (matches)\r
+                                       matchFound = true;\r
                        }\r
-                       if (!good)\r
+                       if (negative) \r
+                               matchFound = !matchFound;\r
+                       if (!matchFound) \r
                                return false;\r
                }\r
                return true;\r
        }\r
        \r
        // match tag names\r
-       private boolean matchTagsAny(List<String> tagNames) {\r
-               List<String> list = getTags();\r
+       private boolean matchTagsAny(List<String> tagNames, List<String> list) {\r
                if (list.size() == 0)\r
                        return true;\r
                \r
                boolean negative = false;               \r
-               boolean found = false;\r
-               \r
+\r
                for (int j=0; j<list.size(); j++) {\r
                        negative = false;\r
                        if (list.get(j).startsWith("-"))\r
@@ -163,21 +159,19 @@ public class REnSearch {
                        String filterName = cleanupWord(list.get(j).substring(pos+1));\r
                        filterName = filterName.replace("*", ".*");   // setup for regular expression pattern match\r
                        \r
-                       if (tagNames.size() == 0)\r
-                               found = false;\r
+                       if (tagNames.size() == 0 && !negative)\r
+                               return false;\r
 \r
                        for (int i=0; i<tagNames.size(); i++) {         \r
                                boolean matches = Pattern.matches(filterName.toLowerCase(),tagNames.get(i).toLowerCase());\r
-                               if (matches)\r
-                                       found = true;\r
+                               if (!matches && !negative)\r
+                                       return false;\r
                        }\r
                }\r
-               if (negative)\r
-                       return !found;\r
-               else\r
-                       return found;\r
+               return true;\r
        }\r
        \r
+       \r
        // Match notebooks in search terms against notes\r
        private boolean matchNotebook(String guid) {\r
                if (getNotebooks().size() == 0)\r
@@ -198,6 +192,28 @@ public class REnSearch {
                        return matchListAll(getNotebooks(), name);\r
        }\r
        // Match notebooks in search terms against notes\r
+       private boolean matchNotebookStack(String guid) {\r
+               if (getStack().size() == 0)\r
+                       return true;\r
+               NotebookTable bookTable = new NotebookTable(logger, conn);\r
+               List<Notebook> books = bookTable.getAll();\r
+\r
+               String name = new String("");\r
+               for (int i=0; i<books.size(); i++) {\r
+                       if (guid.equalsIgnoreCase(books.get(i).getGuid())) {\r
+                               name = books.get(i).getStack();\r
+                               i=books.size();\r
+                       }\r
+               }\r
+               if (name == null)\r
+                       name = "";\r
+               if (any)\r
+                       return matchListAny(getStack(), name);\r
+               else\r
+                       return matchListAll(getStack(), name);\r
+       }\r
+\r
+       // Match notebooks in search terms against notes\r
        private boolean matchListAny(List<String> list, String title) {\r
                if (list.size() == 0)\r
                        return true;\r
@@ -228,7 +244,7 @@ public class REnSearch {
                n = conn.getNoteTable().getNote(n.getGuid(), true, true, false, false, false);\r
 \r
                // Check for search phrases\r
-               String text = StringEscapeUtils.unescapeHtml(n.getContent().replaceAll("\\<.*?\\>", "")).toLowerCase();\r
+               String text = StringEscapeUtils.unescapeHtml4(n.getContent().replaceAll("\\<.*?\\>", "")).toLowerCase();\r
                boolean negative = false;\r
                for (int i=0; i<searchPhrases.size(); i++) {\r
                        String phrase = searchPhrases.get(i);\r
@@ -306,9 +322,9 @@ public class REnSearch {
                int len = search.length();\r
                char nextChar = ' ';\r
                boolean quote = false;\r
-               for (int i=0; i<len; i++) {\r
+               for (int i=0, j=0; i<len; i++, j++) {\r
                        if (search.charAt(i)==nextChar && !quote) {\r
-                               b.setCharAt(i,'\0');\r
+                               b.setCharAt(j,'\0');\r
                                nextChar = ' ';\r
                        } else {\r
                                if (search.charAt(i)=='\"') {\r
@@ -316,6 +332,8 @@ public class REnSearch {
                                                quote=true;\r
                                        } else {\r
                                                quote=false;\r
+                                               j++;\r
+                                               b.insert(j, "\0");\r
                                        }\r
                                }\r
                        }\r
@@ -360,10 +378,9 @@ public class REnSearch {
        // subject date\r
 \r
        private void parseTerms(List<String> words) {\r
-               int minLen = minimumWordLength;\r
-               \r
                for (int i=0; i<words.size(); i++) {\r
                        String word = words.get(i);\r
+                       System.out.println("word = " + word);\r
                        int pos = word.indexOf(":");\r
                        if (word.startsWith("any:")) {\r
                                any = true;\r
@@ -375,8 +392,19 @@ public class REnSearch {
                                searchPhrase=true;\r
                                searchPhrases.add(word.toLowerCase());\r
                        }\r
-                       if (!searchPhrase && pos < 0 && (word.length() >= minLen || word.indexOf('*')>=0)) \r
-                               getWords().add(word);\r
+                       if (!searchPhrase && pos < 0) {\r
+                               if (word != null && word.length() > 0 && !Global.automaticWildcardSearches())\r
+                                       getWords().add(word); \r
+                               if (word != null && word.length() > 0 && Global.automaticWildcardSearches()) {\r
+                                       String wildcardWord = word;\r
+                                       if (!wildcardWord.startsWith("*"))\r
+                                               wildcardWord = "*"+wildcardWord;\r
+                                       if (!wildcardWord.endsWith("*"))\r
+                                               wildcardWord = wildcardWord+"*";\r
+                                       getWords().add(wildcardWord); \r
+                               }\r
+//                             getWords().add("*"+word+"*");           //// WILDCARD\r
+                       }\r
                        if (word.startsWith("intitle:")) \r
                                intitle.add("*"+word+"*");\r
                        if (word.startsWith("-intitle:")) \r
@@ -413,6 +441,10 @@ public class REnSearch {
                                todo.add(word);\r
                        if (word.startsWith("-todo:")) \r
                                todo.add(word);\r
+                       if (word.startsWith("stack:"))\r
+                               stack.add(word);\r
+                       if (word.startsWith("-stack:"))\r
+                               stack.add(word);\r
 \r
                        if (word.startsWith("latitude:")) \r
                                latitude.add(word);\r
@@ -470,22 +502,10 @@ public class REnSearch {
                if (todo.size() == 0 && resource.size() == 0 && searchPhrases.size() == 0)\r
                        return true;\r
                \r
-               boolean returnTodo = false;\r
-               boolean returnResource = false;\r
-               boolean returnPhrase = false;\r
-               \r
-               if (todo.size() == 0)\r
-                       returnTodo = true;\r
-               if (resource.size() == 0)\r
-                       returnResource = true;\r
-               if (searchPhrases.size() == 0)\r
-                       returnPhrase = true;\r
-               \r
-               \r
                n = conn.getNoteTable().getNote(n.getGuid(), true, true, false, false, false);\r
                \r
                // Check for search phrases\r
-               String text = StringEscapeUtils.unescapeHtml(n.getContent().replaceAll("\\<.*?\\>", "")).toLowerCase();\r
+               String text = StringEscapeUtils.unescapeHtml4(n.getContent().replaceAll("\\<.*?\\>", "")).toLowerCase();\r
                boolean negative = false;\r
                for (int i=0; i<searchPhrases.size(); i++) {\r
                        String phrase = searchPhrases.get(i);\r
@@ -496,12 +516,11 @@ public class REnSearch {
                                negative = false;\r
                        phrase = phrase.substring(1);\r
                        phrase = phrase.substring(0,phrase.length()-1);\r
-                       if (text.indexOf(phrase)>=0) {\r
-                               if (!negative)\r
-                                       returnPhrase = true;\r
-                       }\r
-                       if (text.indexOf(phrase)<0 && negative)\r
-                               returnPhrase = true;\r
+                       if (text.indexOf(phrase)>=0 && negative) {\r
+                               return false;\r
+                       } \r
+                       if (text.indexOf(phrase) < 0 && !negative)\r
+                               return false;\r
                }\r
 \r
                \r
@@ -518,27 +537,30 @@ public class REnSearch {
                        if (value.startsWith("-"))\r
                                desiredState = !desiredState;\r
                        int pos = n.getContent().indexOf("<en-todo");\r
-                       if (pos == -1 && value.startsWith("-") && (value.endsWith("*") || value.endsWith(":")))\r
-                               return true;\r
+                       if (pos == -1 && !value.startsWith("-"))\r
+                               return false;\r
                        if (pos > -1 && value.startsWith("-") && (value.endsWith("*") || value.endsWith(":")))\r
                                return false;\r
-                       if (pos == -1) \r
+                       if (pos == -1 && !value.startsWith("-")\r
                                return false;\r
-                       if (value.endsWith("*"))\r
-                               returnTodo = true;\r
+                       boolean returnTodo = false;\r
                        while (pos > -1) {\r
-                               int endPos = n.getContent().indexOf("/>", pos);\r
+                               int endPos = n.getContent().indexOf(">", pos);\r
                                String segment = n.getContent().substring(pos, endPos);\r
                                boolean currentState;\r
                                if (segment.toLowerCase().indexOf("checked=\"true\"") == -1)\r
                                        currentState = false;\r
                                else\r
                                        currentState = true;\r
-                               if (desiredState == currentState)\r
+                               if (desiredState == currentState) \r
+                                       returnTodo = true;\r
+                               if (value.endsWith("*") || value.endsWith(":"))\r
                                        returnTodo = true;\r
                                \r
                                pos = n.getContent().indexOf("<en-todo", pos+1);\r
                        }\r
+                       if (!returnTodo)\r
+                               return false;\r
                }\r
                \r
                // Check resources\r
@@ -553,13 +575,14 @@ public class REnSearch {
                                return false;\r
                        for (int j=0; j<n.getResourcesSize(); j++) {\r
                                boolean match = stringMatch(n.getResources().get(j).getMime(), resourceString, negative);\r
-                               if (!match)\r
+                               if (!match && !negative)\r
+                                       return false;\r
+                               if (match && negative) \r
                                        return false;\r
-                               returnResource = true;\r
                        }\r
                }\r
                \r
-               return returnResource && returnTodo && returnPhrase;\r
+               return true;\r
        }\r
        \r
        private boolean stringMatch(String content, String text, boolean negative) {\r
@@ -661,78 +684,92 @@ public class REnSearch {
        //****************************************\r
        public List<Note> matchWords() {\r
                logger.log(logger.EXTREME, "Inside EnSearch.matchWords()");\r
-               \r
-               StringBuffer buffer = new StringBuffer(100);\r
-               Integer counter = 0;\r
                boolean subSelect = false;\r
                \r
-               buffer.append("Select guid from Note ");\r
+               NoteTable noteTable = new NoteTable(logger, conn);  \r
+               List<String> validGuids = new ArrayList<String>();\r
+               \r
                if (searchWords.size() > 0) \r
                        subSelect = true;\r
-               if (subSelect) {\r
-                       buffer.append(" where guid in ");\r
-               \r
-                       // Build the query words\r
-                       String connector;\r
-                       if (any)\r
-                               connector = new String("or");\r
-                       else\r
-                               connector = new String("and");\r
-                       for (int i=0; i<getWords().size(); i++) {\r
-                               buffer.append("(Select distinct guid from words where ");\r
-                               buffer.append("weight >= :weight"+counter.toString() +" and ");\r
-                               if (getWords().get(i).indexOf("*")==-1)\r
-                                       buffer.append("word=:word" +counter.toString());\r
-                               else\r
-                                       buffer.append("word like :word" +counter.toString());\r
-                               counter++;\r
-                               buffer.append(") ");\r
-                               if (i < getWords().size() -1)\r
-                                       buffer.append(" " +connector +" guid in ");\r
-                       }\r
-               }\r
-               \r
+\r
                NSqlQuery query = new NSqlQuery(conn.getConnection());\r
+               // Build a temp table for GUID results\r
+               if (!conn.dbTableExists("SEARCH_RESULTS")) {\r
+                       query.exec("create temporary table SEARCH_RESULTS (guid varchar)");\r
+                       query.exec("create temporary table SEARCH_RESULTS_MERGE (guid varchar)");\r
+               } else {\r
+                       query. exec("Delete from SEARCH_RESULTS");\r
+                       query. exec("Delete from SEARCH_RESULTS_MERGE");\r
+               }\r
+\r
+               NSqlQuery insertQuery = new NSqlQuery(conn.getConnection());\r
+//             NSqlQuery indexQuery = new NSqlQuery(conn.getIndexConnection());\r
+               NSqlQuery mergeQuery = new NSqlQuery(conn.getConnection());\r
+               NSqlQuery deleteQuery = new NSqlQuery(conn.getConnection());\r
+               NSqlQuery ftlQuery = new NSqlQuery(conn.getConnection());\r
+               ftlQuery.prepare("SELECT N.GUID AS GUID FROM FTL_SEARCH_DATA(:text, 0, 0) FT, NOTE N WHERE FT.TABLE='NOTE' AND N.GUID=FT.KEYS[0]");\r
                \r
-               if (!query.prepare(buffer.toString()))\r
-                       logger.log(logger.HIGH, "EnSearch Sql Prepare Failed:" +query.lastError());\r
+               insertQuery.prepare("Insert into SEARCH_RESULTS (guid) values (:guid)");\r
+               mergeQuery.prepare("Insert into SEARCH_RESULTS_MERGE (guid) values (:guid)");\r
                \r
                if (subSelect) {\r
-                       // Do the binding\r
-                       Integer binder = 0;\r
                        for (int i=0; i<getWords().size(); i++) {\r
-                               String val = getWords().get(i);\r
-                               val = val.replace('*', '%');\r
-                               query.bindValue(":weight"+binder.toString(), minimumRecognitionWeight);\r
-                               query.bindValue(":word"+binder.toString(), cleanupWord(val));\r
-                               binder++;\r
-                       }       \r
-               }\r
+//                             if (getWords().get(i).indexOf("*") == -1) {\r
+//                                     indexQuery.prepare("Select distinct guid from words where weight >= " +minimumRecognitionWeight +\r
+//                                                     " and word=:word");\r
+//                                     indexQuery.bindValue(":word", getWords().get(i));\r
+//                             } else {\r
+//                                     indexQuery.prepare("Select distinct guid from words where weight >= " +minimumRecognitionWeight +\r
+//                                             " and word like :word");\r
+//                                     indexQuery.bindValue(":word", getWords().get(i).replace("*", "%"));\r
+//                             }\r
+                               \r
+                               ftlQuery.bindValue(":text", getWords().get(i));\r
+                               ftlQuery.exec();\r
+                               \r
+                               String guid = null;\r
+                               while(ftlQuery.next()) {\r
+                                       guid = ftlQuery.valueString(0);\r
+                                       if (i==0 || any) {\r
+                                               insertQuery.bindValue(":guid", guid);\r
+                                               insertQuery.exec();\r
+                                       } else {\r
+                                               mergeQuery.bindValue(":guid", guid);\r
+                                               mergeQuery.exec();\r
+                                       }\r
+                               }\r
+                               if (i>0 && !any) {\r
+                                       deleteQuery.exec("Delete from SEARCH_RESULTS where guid not in (select guid from SEARCH_RESULTS_MERGE)");\r
+                                       deleteQuery.exec("Delete from SEARCH_RESULTS_MERGE");\r
+                               }\r
+                       }\r
 \r
-               List<Note> guids = new ArrayList<Note>();\r
-               NoteTable noteTable = new NoteTable(logger, conn);  \r
-               if (!query.exec()) \r
-                       logger.log(logger.EXTREME, "EnSearch.matchWords query failed: " +query.lastError());\r
-               List<String> validGuids = new ArrayList<String>();\r
-               while (query.next()) {\r
-                       String guid = query.valueString(0);\r
-                       validGuids.add(guid);\r
+                       query.prepare("Select distinct guid from Note where guid in (Select guid from SEARCH_RESULTS)");\r
+                       if (!query.exec()) \r
+                               logger.log(logger.LOW, "Error merging search results:" + query.lastError());\r
+               \r
+                       while (query.next()) {\r
+                               validGuids.add(query.valueString(0));\r
+                       }\r
                }\r
-\r
+               \r
                List<Note> noteIndex = noteTable.getAllNotes();\r
+               List<Note> guids = new ArrayList<Note>();\r
                for (int i=0; i<noteIndex.size(); i++) {\r
                        Note n = noteIndex.get(i);\r
                        boolean good = true;\r
                        \r
-                       if (!validGuids.contains(n.getGuid()))\r
+                       if (!validGuids.contains(n.getGuid()) && subSelect)\r
                                good = false;\r
                                                \r
                        // Start matching special stuff, like tags & notebooks\r
                        if (any) {\r
-                               if (good && !matchTagsAny(n.getTagNames()))\r
+                               if (good && !matchTagsAny(n.getTagNames(), getTags()))\r
                                        good = false;\r
                                if (good && !matchNotebook(n.getNotebookGuid()))\r
                                        good = false;\r
+                               if (good && !matchNotebookStack(n.getNotebookGuid()))\r
+                                       good = false;\r
                                if (good && !matchListAny(getIntitle(), n.getTitle()))\r
                                        good = false;\r
                                if (good && !matchListAny(getAuthor(), n.getAttributes().getAuthor()))\r
@@ -750,10 +787,12 @@ public class REnSearch {
                                if (good && n.getAttributes() != null && !matchDatesAny(getSubjectDate(), n.getAttributes().getSubjectDate()))\r
                                        good = false;\r
                        } else {\r
-                               if (good && !matchTagsAll(n.getTagNames()))\r
+                               if (good && !matchTagsAll(n.getTagNames(), getTags()))\r
                                        good = false;\r
                                if (good && !matchNotebook(n.getNotebookGuid()))\r
                                        good = false;\r
+                               if (good && !matchNotebookStack(n.getNotebookGuid()))\r
+                                       good = false;\r
                                if (good && !matchListAll(getIntitle(), n.getTitle()))\r
                                        good = false;\r
                                if (good && !matchListAll(getAuthor(), n.getAttributes().getAuthor()))\r