OSDN Git Service

uni-gramで日本語全文検索できるように変更。ノートコンテンツとタイトルが対象。日本語全文検索時に日本語がハイライトされない問題を修正。
authoryuki <kimaira7@gmail.com>
Wed, 26 Jun 2013 06:41:04 +0000 (15:41 +0900)
committeryuki <kimaira7@gmail.com>
Wed, 26 Jun 2013 06:41:04 +0000 (15:41 +0900)
lib/lucene-core-3.6.2.jar [new file with mode: 0644]
src/cx/fbn/nevernote/Global.java
src/cx/fbn/nevernote/NeverNote.java
src/cx/fbn/nevernote/dialog/ConfigDialog.java
src/cx/fbn/nevernote/sql/DatabaseConnection.java
src/cx/fbn/nevernote/sql/REnSearch.java
src/cx/fbn/nevernote/xml/NoteFormatter.java
src/cx/fbn/nevernote/xml/XMLInsertHilight.java

diff --git a/lib/lucene-core-3.6.2.jar b/lib/lucene-core-3.6.2.jar
new file mode 100644 (file)
index 0000000..bdc96ab
Binary files /dev/null and b/lib/lucene-core-3.6.2.jar differ
index 739c45c..35a260f 100644 (file)
@@ -2325,10 +2325,12 @@ public class Global {
        
        // タグを排除してプレーンテキストを抽出
        public static String extractPlainText(String sourceText) {
-               String plainText = sourceText.replaceAll("<.+?>", "");
-               plainText = plainText.replaceAll("\\s{2,}", " ");
+               String plainText = sourceText.replaceAll("<.+?>", "");  // タグを除去
+               plainText = plainText.replaceAll("\\s{2,}", " ");               // 2個以上の空白文字を1文字の空白に変換
                String kaigyo = System.getProperty("line.separator");
-               plainText = plainText.replaceAll(kaigyo, "");
+               plainText = plainText.replaceAll(kaigyo, "");                   // 改行を除去
+               plainText = plainText.replaceAll("&lt;.+?&gt;", "");    // &lt;で始まり&gt;で終わる文字列を除去
+               plainText = plainText.replaceAll("&.+?;", "");                  // HTML特殊文字を除去
                
                return plainText;
        }
index ae015b7..57a014a 100644 (file)
@@ -1452,7 +1452,7 @@ public class NeverNote extends QMainWindow{
                saveNoteColumnPositions();
                saveNoteIndexWidth();
                showColumns();
-        ConfigDialog settings = new ConfigDialog(this);
+        ConfigDialog settings = new ConfigDialog(this, conn);
         String dateFormat = Global.getDateFormat();
         String timeFormat = Global.getTimeFormat();
         
index 1a4cabd..2b285c6 100644 (file)
@@ -44,6 +44,8 @@ import com.trolltech.qt.gui.QVBoxLayout;
 import com.trolltech.qt.gui.QWidget;\r
 \r
 import cx.fbn.nevernote.Global;\r
+import cx.fbn.nevernote.sql.DatabaseConnection;\r
+import cx.fbn.nevernote.sql.driver.NSqlQuery;\r
 public class ConfigDialog extends QDialog {\r
        private final QListWidget                               contentsWidget;\r
        private final ConfigFontPage                    fontPage;\r
@@ -55,10 +57,12 @@ public class ConfigDialog extends QDialog {
        private final ConfigIndexPage                   indexPage;\r
        // ICHANGED\r
        private final ConfigRensoNoteListPage           rensoNoteListPage;\r
+       private final DatabaseConnection conn;\r
        \r
     private final String iconPath = new String("classpath:cx/fbn/nevernote/icons/");\r
        \r
-       public ConfigDialog(QWidget parent) {\r
+       public ConfigDialog(QWidget parent, DatabaseConnection conn) {\r
+               this.conn = conn;\r
                \r
                contentsWidget = new QListWidget(this);\r
                setWindowIcon(new QIcon(iconPath+"config.png"));\r
@@ -238,6 +242,29 @@ public class ConfigDialog extends QDialog {
                Global.setVerifyExclude(rensoNoteListPage.getVerifyExcludeChecked());\r
                Global.setRensoListItemMaximum(rensoNoteListPage.getRensoListItemMaximum());\r
                \r
+               // 全文検索の対象項目を再設定\r
+               NSqlQuery query = new NSqlQuery(conn.getConnection());\r
+               query.exec("CALL FTL_DROP_ALL();");     // カラム単位で削除できないので一度全部消して、再構築\r
+               \r
+               StringBuilder noteTableTarget = new StringBuilder();\r
+               if (Global.indexNoteBody()) {\r
+                       noteTableTarget.append("CONTENTTEXT");\r
+               }\r
+               if (Global.indexNoteTitle()) {\r
+                       if (noteTableTarget.length() > 0) {\r
+                               noteTableTarget.append(", ");\r
+                       }\r
+                       noteTableTarget.append("TITLE");\r
+               }\r
+               \r
+               // TODO 他の項目もあとで追加\r
+               \r
+               if (noteTableTarget.length() > 0) {\r
+                       query.prepare("CALL FTL_CREATE_INDEX('PUBLIC', 'NOTE', :column);");\r
+                       query.bindValue(":column", noteTableTarget.toString());\r
+                       query.exec();\r
+               }\r
+               \r
                close();\r
        }\r
        \r
index a15330a..deefdc8 100644 (file)
@@ -307,6 +307,16 @@ public class DatabaseConnection {
                                query2.bindValue(":guid", guid);
                                query2.exec();
                        }
+                       
+                       // 全文検索のための準備
+                       query.exec("CREATE ALIAS IF NOT EXISTS FTL_INIT FOR \"org.h2.fulltext.FullTextLucene.init\"");
+                       query.exec("CALL FTL_INIT()");
+                       if (Global.indexNoteBody()) {
+                               query.exec("CALL FTL_CREATE_INDEX('PUBLIC', 'NOTE', 'CONTENTTEXT');");
+                       }
+                       if (Global.indexNoteTitle()) {
+                               query.exec("CALL FTL_CREATE_INDEX('PUBLIC', 'NOTE', 'TITLE');");
+                       }
                }
        }
        
index 1a0ec5b..4ee21b7 100644 (file)
@@ -380,6 +380,7 @@ public class REnSearch {
        private void parseTerms(List<String> words) {\r
                for (int i=0; i<words.size(); i++) {\r
                        String word = words.get(i);\r
+                       System.out.println("word = " + word);\r
                        int pos = word.indexOf(":");\r
                        if (word.startsWith("any:")) {\r
                                any = true;\r
@@ -702,28 +703,33 @@ public class REnSearch {
                }\r
 \r
                NSqlQuery insertQuery = new NSqlQuery(conn.getConnection());\r
-               NSqlQuery indexQuery = new NSqlQuery(conn.getIndexConnection());\r
+//             NSqlQuery indexQuery = new NSqlQuery(conn.getIndexConnection());\r
                NSqlQuery mergeQuery = new NSqlQuery(conn.getConnection());\r
                NSqlQuery deleteQuery = new NSqlQuery(conn.getConnection());\r
+               NSqlQuery ftlQuery = new NSqlQuery(conn.getConnection());\r
+               ftlQuery.prepare("SELECT N.GUID AS GUID FROM FTL_SEARCH_DATA(:text, 0, 0) FT, NOTE N WHERE FT.TABLE='NOTE' AND N.GUID=FT.KEYS[0]");\r
                \r
                insertQuery.prepare("Insert into SEARCH_RESULTS (guid) values (:guid)");\r
                mergeQuery.prepare("Insert into SEARCH_RESULTS_MERGE (guid) values (:guid)");\r
                \r
                if (subSelect) {\r
                        for (int i=0; i<getWords().size(); i++) {\r
-                               if (getWords().get(i).indexOf("*") == -1) {\r
-                                       indexQuery.prepare("Select distinct guid from words where weight >= " +minimumRecognitionWeight +\r
-                                                       " and word=:word");\r
-                                       indexQuery.bindValue(":word", getWords().get(i));\r
-                               } else {\r
-                                       indexQuery.prepare("Select distinct guid from words where weight >= " +minimumRecognitionWeight +\r
-                                               " and word like :word");\r
-                                       indexQuery.bindValue(":word", getWords().get(i).replace("*", "%"));\r
-                               }\r
-                               indexQuery.exec();\r
+//                             if (getWords().get(i).indexOf("*") == -1) {\r
+//                                     indexQuery.prepare("Select distinct guid from words where weight >= " +minimumRecognitionWeight +\r
+//                                                     " and word=:word");\r
+//                                     indexQuery.bindValue(":word", getWords().get(i));\r
+//                             } else {\r
+//                                     indexQuery.prepare("Select distinct guid from words where weight >= " +minimumRecognitionWeight +\r
+//                                             " and word like :word");\r
+//                                     indexQuery.bindValue(":word", getWords().get(i).replace("*", "%"));\r
+//                             }\r
+                               \r
+                               ftlQuery.bindValue(":text", getWords().get(i));\r
+                               ftlQuery.exec();\r
+                               \r
                                String guid = null;\r
-                               while(indexQuery.next()) {\r
-                                       guid = indexQuery.valueString(0);\r
+                               while(ftlQuery.next()) {\r
+                                       guid = ftlQuery.valueString(0);\r
                                        if (i==0 || any) {\r
                                                insertQuery.bindValue(":guid", guid);\r
                                                insertQuery.exec();\r
index 17674e1..9a62abd 100644 (file)
@@ -123,8 +123,9 @@ public class NoteFormatter {
                logger.log(logger.EXTREME, "Note guid: " +currentNoteGuid);\r
                logger.log(logger.EXTREME, "Note Text:" +currentNote);\r
                QDomDocument doc = new QDomDocument();\r
-               QDomDocument.Result result = doc.setContent(currentNote.getContent());\r
-\r
+//             QDomDocument.Result result = doc.setContent(currentNote.getContent());\r
+               QDomDocument.Result result = doc.setContent(conn.getNoteTable().getNoteContentNoUTFConversion(currentNote.getGuid()));\r
+               \r
                // Handle any errors\r
                if (!result.success) {\r
                        logger.log(logger.LOW, "Error parsing document.  Attempting to restructure");\r
@@ -197,8 +198,13 @@ public class NoteFormatter {
                        for (int j=z-1; j>i+1; j--) \r
                                html.deleteCharAt(j);\r
                } \r
+               \r
+               QTextCodec codec;\r
+               codec = QTextCodec.codecForName("UTF-8");\r
+               String value = codec.fromUnicode(html.toString()).toString();\r
+               return value;\r
 \r
-               return html.toString(); //.replace("<Body", "<Body dir=\"rtl\"");\r
+//             return html.toString(); //.replace("<Body", "<Body dir=\"rtl\"");\r
        }       \r
 \r
        private void addImageHilight(String resGuid, QFile f) {\r
index 179d9ed..6b90cc6 100644 (file)
@@ -86,8 +86,9 @@ public class XMLInsertHilight {
        }\r
        \r
        // We found a text node, so we need to search for things to hilight\r
-       private void scanWords(QDomNode node) {\r
+       private void scanWords(QDomNode node) { \r
                String value = node.nodeValue();\r
+               \r
                QDomDocumentFragment fragment = doc.createDocumentFragment();\r
                boolean matchFound = false;\r
                int previousPosition = 0;\r
@@ -151,9 +152,9 @@ public class XMLInsertHilight {
                        String term = terms.get(i);\r
                        if (term.indexOf("*") > -1) {\r
                                term = term.replace("*", "");\r
-                       } else {\r
+                       }/* else {\r
                                term = "\\b"+term+"\\b";\r
-                       }\r
+                       }*/\r
                        regex.append(term);\r
                        if (i<terms.size()-1)\r
                                regex.append("|"); \r