OSDN Git Service

Merge Hiroshi's language changes into development branch.
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / threads / IndexRunner.java
1 /*\r
2  * This file is part of NeverNote \r
3  * Copyright 2009 Randy Baumgarte\r
4  * \r
5  * This file may be licensed under the terms of of the\r
6  * GNU General Public License Version 2 (the ``GPL'').\r
7  *\r
8  * Software distributed under the License is distributed\r
9  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
10  * express or implied. See the GPL for the specific language\r
11  * governing rights and limitations.\r
12  *\r
13  * You should have received a copy of the GPL along with this\r
14  * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
15  * or write to the Free Software Foundation, Inc.,\r
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
17  *\r
18 */\r
19 \r
20 package cx.fbn.nevernote.threads;\r
21 \r
22 import java.util.concurrent.LinkedBlockingQueue;\r
23 \r
24 import org.apache.commons.lang.StringEscapeUtils;\r
25 \r
26 import com.evernote.edam.type.Note;\r
27 import com.evernote.edam.type.Resource;\r
28 import com.trolltech.qt.core.QByteArray;\r
29 import com.trolltech.qt.core.QObject;\r
30 import com.trolltech.qt.xml.QDomDocument;\r
31 import com.trolltech.qt.xml.QDomElement;\r
32 import com.trolltech.qt.xml.QDomNodeList;\r
33 \r
34 import cx.fbn.nevernote.Global;\r
35 import cx.fbn.nevernote.signals.NoteResourceSignal;\r
36 import cx.fbn.nevernote.signals.NoteSignal;\r
37 import cx.fbn.nevernote.sql.DatabaseConnection;\r
38 import cx.fbn.nevernote.utilities.ApplicationLogger;\r
39 \r
40 //public class IndexRunner implements QRunnable {\r
41 public class IndexRunner extends QObject implements Runnable {\r
42         \r
43         private final ApplicationLogger         logger;\r
44         private String                                          guid;\r
45         private QByteArray                                      resourceBinary;\r
46         public volatile NoteSignal                      noteSignal;\r
47         public volatile NoteResourceSignal      resourceSignal;\r
48         private int                                                     indexType;\r
49         public final int                                        CONTENT=1; \r
50         public final int                                        RESOURCE=2;\r
51         private boolean                                         keepRunning;\r
52 //      public volatile int                                     ID;\r
53         private final QDomDocument                      doc;\r
54         private static String                           regex = Global.getWordRegex();\r
55         private final DatabaseConnection        conn;\r
56         private volatile LinkedBlockingQueue<String> workQueue;\r
57 //      private static int MAX_EMPTY_QUEUE_COUNT = 1;\r
58         private static int MAX_QUEUED_WAITING = 1000;\r
59 \r
60         \r
61 \r
62         \r
63         public IndexRunner(String logname, String u, String uid, String pswd, String cpswd) {\r
64                 logger = new ApplicationLogger(logname);\r
65                 conn = new DatabaseConnection(logger, u, uid, pswd, cpswd);\r
66                 noteSignal = new NoteSignal();\r
67                 resourceSignal = new NoteResourceSignal();\r
68 //              threadSignal = new ThreadSignal();\r
69                 indexType = CONTENT;\r
70                 guid = null;\r
71                 keepRunning = true;\r
72                 doc = new QDomDocument();\r
73                 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);\r
74         }\r
75         \r
76         \r
77         public void setIndexType(int t) {\r
78                 indexType = t;\r
79         }\r
80         \r
81         \r
82         @Override\r
83         public void run() {\r
84                 thread().setPriority(Thread.MIN_PRIORITY);\r
85                 logger.log(logger.EXTREME, "Starting index thread ");\r
86                 while (keepRunning) {\r
87                         try {\r
88                                 String work = workQueue.take();\r
89                                 if (work.startsWith("CONTENT")) {\r
90                                         work = work.replace("CONTENT ", "");\r
91                                         guid = work;\r
92                                         indexType = CONTENT;\r
93                                 }\r
94                                 if (work.startsWith("RESOURCE")) {\r
95                                         work = work.replace("RESOURCE ", "");\r
96                                         guid = work;\r
97                                         indexType = RESOURCE;\r
98                                 }\r
99                                 if (work.startsWith("STOP")) {\r
100                                         keepRunning = false;\r
101                                         guid = work;\r
102                                 }\r
103                                 if (guid == null || guid.trim().equals("")) {\r
104                                         setIndexType(0);\r
105                                         resourceSignal.resourceIndexed.emit("null or empty guid");\r
106                                 }\r
107                                 logger.log(logger.EXTREME, "Type:" +indexType);\r
108                                 if (indexType == CONTENT && keepRunning) {\r
109                                         logger.log(logger.MEDIUM, "Indexing note: "+guid);\r
110                                         indexNoteContent();\r
111                                         setIndexType(0);\r
112                                 }\r
113                                 if (indexType == RESOURCE && keepRunning) {\r
114                                         logger.log(logger.MEDIUM, "Indexing resource: "+guid);\r
115                                         indexResource();\r
116                                         setIndexType(0);\r
117                                 }\r
118                         } catch (InterruptedException e) {\r
119                                 // TODO Auto-generated catch block\r
120                                 e.printStackTrace();\r
121                         }\r
122                 }\r
123                 conn.dbShutdown();\r
124         }\r
125         \r
126         // Reindex a note\r
127         public void indexNoteContent() {\r
128                 logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
129                 \r
130                 logger.log(logger.EXTREME, "Getting note content");\r
131                 Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
132                 String data = n.getContent();\r
133                 \r
134                 logger.log(logger.EXTREME, "Removing any encrypted data");\r
135                 data = removeEnCrypt(data);\r
136                 logger.log(logger.EXTREME, "Removing xml markups");\r
137                 String text = StringEscapeUtils.unescapeHtml(data.replaceAll("\\<.*?\\>", ""));\r
138 \r
139                 \r
140                 logger.log(logger.EXTREME, "Splitting words");\r
141                 String[] result = text.toString().split(regex);\r
142                 logger.log(logger.EXTREME, "Deleting existing words for note from index");\r
143                 conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");\r
144                 \r
145                 logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
146                 for (int j=0; j<result.length && keepRunning; j++) {\r
147                         logger.log(logger.EXTREME, "Result word: " +result[j]);\r
148                         if (result[j].length() > 0) {\r
149                                 if (Character.isLetterOrDigit(result[j].charAt(0))) {\r
150                                         int len = result[j].length();\r
151                                         StringBuffer buffer = new StringBuffer(result[j].toLowerCase());\r
152                                         logger.log(logger.EXTREME, "Processing " +buffer);\r
153                                         for (int k=len-1; k>=0 && keepRunning; k--) {\r
154                                                 if (!Character.isLetterOrDigit(result[j].charAt(k)))\r
155                                                         buffer.deleteCharAt(k);\r
156                                                 else\r
157                                                         k=-1;\r
158                                         }\r
159 \r
160                                         if (buffer.length()>=Global.minimumWordCount) {\r
161                                                 logger.log(logger.EXTREME, "Adding " +buffer);\r
162                                                 conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), "CONTENT", 100);\r
163                                         }\r
164                                 }\r
165                         }\r
166                 }\r
167                 // If we were interrupted, we will reindex this note next time\r
168                 if (Global.keepRunning) {\r
169                         logger.log(logger.EXTREME, "Resetting note guid needed");\r
170                         conn.getNoteTable().setIndexNeeded(guid, false);\r
171                 }\r
172                 logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
173         }\r
174 \r
175         \r
176         public synchronized boolean addWork(String request) {\r
177                 if (workQueue.size() == 0) {\r
178                         workQueue.offer(request);\r
179                         return true;\r
180                 }\r
181                 return false;\r
182         }\r
183         \r
184         public synchronized int getWorkQueueSize() {\r
185                 return workQueue.size();\r
186         }\r
187         \r
188         public void indexResource() {\r
189                 \r
190                 if (guid == null)\r
191                         return;\r
192                 \r
193                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);\r
194                 if (r == null || r.getRecognition() == null || r.getRecognition().getBody() == null || r.getRecognition().getBody().length == 0) \r
195                         resourceBinary = new QByteArray(" ");\r
196                 else\r
197                         resourceBinary = new QByteArray(r.getRecognition().getBody());\r
198                 \r
199                 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");\r
200                         \r
201                 doc.setContent(resourceBinary);\r
202                 QDomElement docElem = doc.documentElement();\r
203                         \r
204                 // look for text tags\r
205                 QDomNodeList anchors = docElem.elementsByTagName("t");\r
206                 for (int i=0; i<anchors.length() && keepRunning; i++) {\r
207                         QDomElement enmedia = anchors.at(i).toElement();\r
208                         String weight = new String(enmedia.attribute("w"));\r
209                         String text = new String(enmedia.text()).toLowerCase();\r
210                         if (!text.equals("")) {\r
211                                 conn.getWordsTable().addWordToNoteIndex(guid, text, "RESOURCE", new Integer(weight));\r
212                         }\r
213                 }\r
214                 if (Global.keepRunning)\r
215                         conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
216         }\r
217 \r
218         \r
219         private String removeEnCrypt(String content) {\r
220                 int index = content.indexOf("<en-crypt");\r
221                 int endPos;\r
222                 boolean tagFound = true;\r
223                 while (tagFound && keepRunning) {\r
224                         endPos = content.indexOf("</en-crypt>", index)+11;\r
225                         if (endPos > -1 && index > -1) {\r
226                                 content = content.substring(0,index)+content.substring(endPos);\r
227                                 index = content.indexOf("<en-crypt");\r
228                         } else {\r
229                                 tagFound = false;\r
230                         }\r
231                 }\r
232                 return content;\r
233         }\r
234 \r
235         \r
236         \r
237         \r
238 \r
239 }\r