OSDN Git Service

Correct word index bug which excluded some words in error.
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / threads / IndexRunner.java
1 /*\r
2  * This file is part of NeverNote \r
3  * Copyright 2009 Randy Baumgarte\r
4  * \r
5  * This file may be licensed under the terms of of the\r
6  * GNU General Public License Version 2 (the ``GPL'').\r
7  *\r
8  * Software distributed under the License is distributed\r
9  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
10  * express or implied. See the GPL for the specific language\r
11  * governing rights and limitations.\r
12  *\r
13  * You should have received a copy of the GPL along with this\r
14  * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
15  * or write to the Free Software Foundation, Inc.,\r
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
17  *\r
18 */\r
19 \r
20 package cx.fbn.nevernote.threads;\r
21 \r
22 import java.io.File;\r
23 import java.io.FileInputStream;\r
24 import java.io.FileNotFoundException;\r
25 import java.io.IOException;\r
26 import java.io.InputStream;\r
27 import java.util.List;\r
28 import java.util.TreeSet;\r
29 import java.util.concurrent.LinkedBlockingQueue;\r
30 \r
31 import org.apache.commons.lang.StringEscapeUtils;\r
32 import org.apache.tika.exception.TikaException;\r
33 import org.apache.tika.metadata.Metadata;\r
34 import org.apache.tika.parser.ParseContext;\r
35 import org.apache.tika.parser.microsoft.OfficeParser;\r
36 import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;\r
37 import org.apache.tika.parser.odf.OpenDocumentParser;\r
38 import org.apache.tika.parser.pdf.PDFParser;\r
39 import org.apache.tika.parser.rtf.RTFParser;\r
40 import org.apache.tika.sax.BodyContentHandler;\r
41 import org.xml.sax.ContentHandler;\r
42 import org.xml.sax.SAXException;\r
43 \r
44 import com.evernote.edam.type.Data;\r
45 import com.evernote.edam.type.Note;\r
46 import com.evernote.edam.type.Resource;\r
47 import com.trolltech.qt.core.QByteArray;\r
48 import com.trolltech.qt.core.QIODevice.OpenModeFlag;\r
49 import com.trolltech.qt.core.QObject;\r
50 import com.trolltech.qt.core.QTemporaryFile;\r
51 import com.trolltech.qt.xml.QDomDocument;\r
52 import com.trolltech.qt.xml.QDomElement;\r
53 import com.trolltech.qt.xml.QDomNodeList;\r
54 \r
55 import cx.fbn.nevernote.Global;\r
56 import cx.fbn.nevernote.signals.IndexSignal;\r
57 import cx.fbn.nevernote.signals.NoteResourceSignal;\r
58 import cx.fbn.nevernote.signals.NoteSignal;\r
59 import cx.fbn.nevernote.sql.DatabaseConnection;\r
60 import cx.fbn.nevernote.utilities.ApplicationLogger;\r
61 \r
62 public class IndexRunner extends QObject implements Runnable {\r
63         \r
64         private final ApplicationLogger         logger;\r
65         private String                                          guid;\r
66         private QByteArray                                      resourceBinary;\r
67         public volatile NoteSignal                      noteSignal;\r
68         public volatile NoteResourceSignal      resourceSignal;\r
69         private int                                                     indexType;\r
70         public final int                                        SCAN=1; \r
71         public final int                                        REINDEXALL=2;\r
72         public final int                                        REINDEXNOTE=3;\r
73         public boolean                                          keepRunning;\r
74         private final QDomDocument                      doc;\r
75         private static String                           regex = Global.getWordRegex();\r
76         private final DatabaseConnection        conn;\r
77         private volatile LinkedBlockingQueue<String> workQueue;\r
78         private static int MAX_QUEUED_WAITING = 1000;\r
79         public boolean interrupt;\r
80         public boolean idle;\r
81         public boolean indexAttachmentsLocally = true;\r
82         public volatile IndexSignal                     signal;\r
83         private final TreeSet<String>           foundWords;\r
84 \r
85         \r
86         public IndexRunner(String logname, String u, String uid, String pswd, String cpswd) {\r
87                 foundWords = new TreeSet<String>();\r
88                 logger = new ApplicationLogger(logname);\r
89                 conn = new DatabaseConnection(logger, u, uid, pswd, cpswd);\r
90                 indexType = SCAN;\r
91                 guid = null;\r
92                 keepRunning = true;\r
93                 doc = new QDomDocument();\r
94                 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);  \r
95         }\r
96         \r
97         public void setIndexType(int t) {\r
98                 indexType = t;\r
99         }\r
100         \r
101         \r
102         @Override\r
103         public void run() {\r
104                 thread().setPriority(Thread.MIN_PRIORITY);\r
105                 noteSignal = new NoteSignal();\r
106                 resourceSignal = new NoteResourceSignal();\r
107                 signal = new IndexSignal();\r
108                 logger.log(logger.EXTREME, "Starting index thread ");\r
109                 while (keepRunning) {\r
110                         idle=true;\r
111                         try {\r
112                                 //waitSeconds(1);\r
113                                 String work = workQueue.take();\r
114                                 idle=false;\r
115                                 if (work.startsWith("SCAN")) {\r
116                                         guid=null;\r
117                                         interrupt = false;\r
118                                         indexType = SCAN;\r
119                                 }\r
120                                 if (work.startsWith("REINDEXALL")) {\r
121                                         guid = null;\r
122                                         indexType=REINDEXALL;\r
123                                 }\r
124                                 if (work.startsWith("REINDEXNOTE")) {\r
125                                         work = work.replace("REINDEXNOTE ", "");\r
126                                         guid = work;\r
127                                         indexType = REINDEXNOTE;\r
128                                 }\r
129                                 if (work.startsWith("STOP")) {\r
130                                         keepRunning = false;\r
131                                         guid = null;\r
132                                 }\r
133                                 logger.log(logger.EXTREME, "Type:" +indexType);\r
134                                 if (indexType == SCAN && keepRunning) {\r
135                                         logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");\r
136                                         scanUnindexed();\r
137                                         setIndexType(0);\r
138                                 }\r
139                                 if (indexType == REINDEXALL && keepRunning) {\r
140                                         logger.log(logger.MEDIUM, "Marking all for reindex");\r
141                                         reindexAll();\r
142                                         setIndexType(0);\r
143                                 }\r
144                                 if (indexType == REINDEXNOTE && keepRunning) {\r
145                                         reindexNote();\r
146                                 }\r
147                         } catch (InterruptedException e) {\r
148                                 logger.log(logger.LOW, "Thread interrupted exception: " +e.getMessage());\r
149                         }\r
150                 }\r
151                 logger.log(logger.EXTREME, "Shutting down database");\r
152                 conn.dbShutdown();\r
153                 logger.log(logger.EXTREME, "Database shut down.  Exiting thread");\r
154         }\r
155         \r
156         // Reindex a note\r
157         public void indexNoteContent() {\r
158                 foundWords.clear();\r
159                 \r
160                 logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
161                 \r
162                 logger.log(logger.EXTREME, "Getting note content");\r
163                 Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
164                 String data = n.getContent();\r
165                 data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
166                 \r
167                 logger.log(logger.EXTREME, "Removing any encrypted data");\r
168                 data = removeEnCrypt(data.toString());\r
169                 logger.log(logger.EXTREME, "Removing xml markups");\r
170                 String text =  removeTags(StringEscapeUtils.unescapeHtml(data) +" "+\r
171                 n.getTitle());\r
172                                 \r
173                 logger.log(logger.EXTREME, "Splitting words");\r
174                 String[] result = text.toString().split(regex);\r
175                 logger.log(logger.EXTREME, "Deleting existing words for note from index");\r
176                 conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");\r
177                 \r
178                 logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
179                 for (int j=0; j<result.length && keepRunning; j++) {\r
180                         if (!result[j].trim().equals("")) {\r
181                                 logger.log(logger.EXTREME, "Result word: " +result[j].trim());\r
182                                 addToIndex(guid, result[j], "CONTENT");\r
183                         }\r
184                 }\r
185                 // If we were interrupted, we will reindex this note next time\r
186                 if (Global.keepRunning) {\r
187                         logger.log(logger.EXTREME, "Resetting note guid needed");\r
188                         conn.getNoteTable().setIndexNeeded(guid, false);\r
189                 }\r
190                 logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
191         }\r
192         \r
193         \r
194         private String removeTags(String text) {\r
195                 StringBuffer buffer = new StringBuffer(text);\r
196                 boolean inTag = false;\r
197                 for (int i=buffer.length()-1; i>=0; i--) {\r
198                         if (buffer.charAt(i) == '>')\r
199                                 inTag = true;\r
200                         if (buffer.charAt(i) == '<')\r
201                                 inTag = false;\r
202                         if (inTag || buffer.charAt(i) == '<')\r
203                                 buffer.deleteCharAt(i);\r
204                 }\r
205                 \r
206                 return buffer.toString();\r
207         }\r
208 \r
209         \r
210         public synchronized boolean addWork(String request) {\r
211                 if (workQueue.size() == 0) {\r
212                         workQueue.offer(request);\r
213                         return true;\r
214                 }\r
215                 return false;\r
216         }\r
217         \r
218         public synchronized int getWorkQueueSize() {\r
219                 return workQueue.size();\r
220         }\r
221         \r
222         public void indexResource() {\r
223                 \r
224                 if (guid == null)\r
225                         return;\r
226                 foundWords.clear();\r
227                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);\r
228                 if (r == null || r.getRecognition() == null || r.getRecognition().getBody() == null || r.getRecognition().getBody().length == 0) \r
229                         resourceBinary = new QByteArray(" ");\r
230                 else\r
231                         resourceBinary = new QByteArray(r.getRecognition().getBody());\r
232                 \r
233                 conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");\r
234                 // This is due to an old bug & can be removed at some point in the future 11/23/2010\r
235                 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");   \r
236                         \r
237                 doc.setContent(resourceBinary);\r
238                 QDomElement docElem = doc.documentElement();\r
239                         \r
240                 // look for text tags\r
241                 QDomNodeList anchors = docElem.elementsByTagName("t");\r
242                 for (int i=0; i<anchors.length() && keepRunning; i++) {\r
243                         QDomElement enmedia = anchors.at(i).toElement();\r
244                         String weight = new String(enmedia.attribute("w"));\r
245                         String text = new String(enmedia.text()).toLowerCase();\r
246                         if (!text.equals("")) {\r
247                                 conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));\r
248                         }\r
249                 }\r
250                 \r
251                 if (Global.keepRunning && indexAttachmentsLocally) {\r
252                         indexResourceContent(guid);\r
253                 }\r
254                                 \r
255                 if (Global.keepRunning)\r
256                         conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
257         }\r
258         \r
259         private void indexResourceContent(String guid) {\r
260                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);\r
261                 if (r.getMime().equalsIgnoreCase("application/pdf")) {\r
262                         indexResourcePDF(r);\r
263                         return;\r
264                 }\r
265                 if (r.getMime().equalsIgnoreCase("application/docx") || \r
266                         r.getMime().equalsIgnoreCase("application/xlsx") || \r
267                         r.getMime().equalsIgnoreCase("application/pptx")) {\r
268                         indexResourceOOXML(r);\r
269                         return;\r
270                 }\r
271                 if (r.getMime().equalsIgnoreCase("application/vsd") ||\r
272                         r.getMime().equalsIgnoreCase("application/ppt") ||\r
273                         r.getMime().equalsIgnoreCase("application/xls") ||\r
274                         r.getMime().equalsIgnoreCase("application/msg") ||\r
275                         r.getMime().equalsIgnoreCase("application/doc")) {\r
276                                 indexResourceOffice(r);\r
277                                 return;\r
278                 }\r
279                 if (r.getMime().equalsIgnoreCase("application/rtf")) {\r
280                                         indexResourceRTF(r);\r
281                                         return;\r
282                 }\r
283                 if (r.getMime().equalsIgnoreCase("application/odf") ||\r
284                         r.getMime().equalsIgnoreCase("application/odt") ||\r
285                         r.getMime().equalsIgnoreCase("application/odp") ||\r
286                         r.getMime().equalsIgnoreCase("application/odg") ||\r
287                         r.getMime().equalsIgnoreCase("application/odb") ||\r
288                         r.getMime().equalsIgnoreCase("application/ods")) {\r
289                         indexResourceODF(r);\r
290                         return;\r
291                 }\r
292         }\r
293 \r
294 \r
295         private void indexResourceRTF(Resource r) {\r
296 \r
297                 QTemporaryFile f = writeResource(r.getData());\r
298                 if (!keepRunning) {\r
299                         return;\r
300                 }\r
301                 \r
302                 InputStream input;\r
303                 try {\r
304                         input = new FileInputStream(new File(f.fileName()));\r
305                         ContentHandler textHandler = new BodyContentHandler(-1);\r
306                         Metadata metadata = new Metadata();\r
307                         RTFParser parser = new RTFParser();     \r
308                         ParseContext context = new ParseContext();\r
309                         parser.parse(input, textHandler, metadata, context);\r
310                         String[] result = textHandler.toString().split(regex);\r
311                         for (int i=0; i<result.length && keepRunning; i++) {\r
312                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
313                         }\r
314                         input.close();\r
315                 \r
316                         f.close();\r
317                 } catch (java.lang.ClassCastException e) {\r
318                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
319                 } catch (FileNotFoundException e) {\r
320                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
321                 } catch (IOException e) {\r
322                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
323                 } catch (SAXException e) {\r
324                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
325                 } catch (TikaException e) {\r
326                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
327                 } catch (Exception e) {\r
328                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
329                 } catch (java.lang.NoSuchMethodError e) {\r
330                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
331                 } catch (Error e) {\r
332                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
333                 }\r
334         }\r
335 \r
336         \r
337         private void indexResourceODF(Resource r) {\r
338 \r
339                 QTemporaryFile f = writeResource(r.getData());\r
340                 if (!keepRunning) {\r
341                         return;\r
342                 }\r
343                 \r
344                 InputStream input;\r
345                 try {\r
346                         input = new FileInputStream(new File(f.fileName()));\r
347                         ContentHandler textHandler = new BodyContentHandler(-1);\r
348                         Metadata metadata = new Metadata();\r
349                         OpenDocumentParser parser = new OpenDocumentParser();   \r
350                         ParseContext context = new ParseContext();\r
351                         parser.parse(input, textHandler, metadata, context);\r
352                         String[] result = textHandler.toString().split(regex);\r
353                         for (int i=0; i<result.length && keepRunning; i++) {\r
354                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
355                         }\r
356                         input.close();\r
357                 \r
358                         f.close();\r
359                 } catch (java.lang.ClassCastException e) {\r
360                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
361                 } catch (FileNotFoundException e) {\r
362                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
363                 } catch (IOException e) {\r
364                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
365                 } catch (SAXException e) {\r
366                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
367                 } catch (TikaException e) {\r
368                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
369                 } catch (Exception e) {\r
370                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
371                 } catch (java.lang.NoSuchMethodError e) {\r
372                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
373                 } catch (Error e) {\r
374                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
375                 }\r
376         }\r
377 \r
378         \r
379         private void indexResourceOffice(Resource r) {\r
380 \r
381                 QTemporaryFile f = writeResource(r.getData());\r
382                 if (!keepRunning) {\r
383                         return;\r
384                 }\r
385                 \r
386                 InputStream input;\r
387                 try {\r
388                         input = new FileInputStream(new File(f.fileName()));\r
389                         ContentHandler textHandler = new BodyContentHandler(-1);\r
390                         Metadata metadata = new Metadata();\r
391                         OfficeParser parser = new OfficeParser();       \r
392                         ParseContext context = new ParseContext();\r
393                         parser.parse(input, textHandler, metadata, context);\r
394                         String[] result = textHandler.toString().split(regex);\r
395                         for (int i=0; i<result.length && keepRunning; i++) {\r
396                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
397                         }\r
398                         input.close();\r
399                 \r
400                         f.close();\r
401                 } catch (java.lang.ClassCastException e) {\r
402                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
403                 } catch (FileNotFoundException e) {\r
404                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
405                 } catch (IOException e) {\r
406                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
407                 } catch (SAXException e) {\r
408                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
409                 } catch (TikaException e) {\r
410                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
411                 } catch (Exception e) {\r
412                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
413                 } catch (java.lang.NoSuchMethodError e) {\r
414                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
415                 } catch (Error e) {\r
416                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
417                 }\r
418         }\r
419 \r
420         \r
421         \r
422         private void indexResourcePDF(Resource r) {\r
423 \r
424                 QTemporaryFile f = writeResource(r.getData());\r
425                 if (!keepRunning) {\r
426                         return;\r
427                 }\r
428                 \r
429                 InputStream input;\r
430                 try {                   \r
431                         input = new FileInputStream(new File(f.fileName()));\r
432                         ContentHandler textHandler = new BodyContentHandler(-1);\r
433                         Metadata metadata = new Metadata();\r
434                         PDFParser parser = new PDFParser();     \r
435                         ParseContext context = new ParseContext();\r
436                         parser.parse(input, textHandler, metadata, context);\r
437                         String[] result = textHandler.toString().split(regex);\r
438                         for (int i=0; i<result.length && keepRunning; i++) {\r
439                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
440                         }\r
441                         input.close();\r
442                 \r
443                         f.close();\r
444                 } catch (java.lang.ClassCastException e) {\r
445                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
446                 } catch (FileNotFoundException e) {\r
447                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
448                 } catch (IOException e) {\r
449                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
450                 } catch (SAXException e) {\r
451                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
452                 } catch (TikaException e) {\r
453                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
454                 } catch (Exception e) {\r
455                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
456                 } catch (java.lang.NoSuchMethodError e) {\r
457                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
458                 } catch (Error e) {\r
459                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
460                 }\r
461         }\r
462         \r
463         \r
464         private void indexResourceOOXML(Resource r) {\r
465 \r
466                 QTemporaryFile f = writeResource(r.getData());\r
467                 if (!keepRunning) {\r
468                         return;\r
469                 }\r
470                 \r
471                 InputStream input;\r
472                 try {\r
473                         input = new FileInputStream(new File(f.fileName()));\r
474                         ContentHandler textHandler = new BodyContentHandler(-1);\r
475                         Metadata metadata = new Metadata();\r
476                         OOXMLParser parser = new OOXMLParser(); \r
477                         ParseContext context = new ParseContext();\r
478                         parser.parse(input, textHandler, metadata, context);\r
479                         String[] result = textHandler.toString().split(regex);\r
480                         for (int i=0; i<result.length && keepRunning; i++) {\r
481                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
482                         }\r
483                         input.close();\r
484                 \r
485                         f.close();\r
486                 } catch (java.lang.ClassCastException e) {\r
487                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
488                 } catch (FileNotFoundException e) {\r
489                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
490                 } catch (IOException e) {\r
491                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
492                 } catch (SAXException e) {\r
493                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
494                 } catch (TikaException e) {\r
495                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
496                 } catch (Exception e) {\r
497                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
498                 } catch (java.lang.NoSuchMethodError e) {\r
499                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
500                 } catch (Error e) {\r
501                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
502                 }\r
503         }\r
504         \r
505 \r
506         \r
507         private QTemporaryFile writeResource(Data d) {\r
508                 QTemporaryFile newFile = new QTemporaryFile();\r
509                 newFile.open(OpenModeFlag.WriteOnly);\r
510                 newFile.write(d.getBody());\r
511                 newFile.close();\r
512                 return newFile;\r
513         } \r
514 \r
515         \r
516         private String removeEnCrypt(String content) {\r
517                 int index = content.indexOf("<en-crypt");\r
518                 int endPos;\r
519                 boolean tagFound = true;\r
520                 while (tagFound && keepRunning) {\r
521                         endPos = content.indexOf("</en-crypt>", index)+11;\r
522                         if (endPos > -1 && index > -1) {\r
523                                 content = content.substring(0,index)+content.substring(endPos);\r
524                                 index = content.indexOf("<en-crypt");\r
525                         } else {\r
526                                 tagFound = false;\r
527                         }\r
528                 }\r
529                 return content;\r
530         }\r
531 \r
532         \r
533         private void addToIndex(String guid, String word, String type) {\r
534                 if (foundWords.contains(word))\r
535                         return;\r
536                 StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
537                 for (int i=buffer.length()-1; i>=0; i--) {\r
538                         if (!Character.isLetterOrDigit(buffer.charAt(i)))\r
539                                 buffer.deleteCharAt(i);\r
540                         else\r
541                                 break;\r
542                 }\r
543                 buffer = buffer.reverse();\r
544                 for (int i=buffer.length()-1; i>=0; i--) {\r
545                         if (!Character.isLetterOrDigit(buffer.charAt(i)))\r
546                                 buffer.deleteCharAt(i);\r
547                         else\r
548                                 break;\r
549                 }\r
550                 buffer = buffer.reverse();\r
551                 if (buffer.length() > 0) {\r
552                         // We have a good word, now let's trim off junk at the beginning or end\r
553                         if (!foundWords.contains(buffer.toString())) {\r
554                                 foundWords.add(buffer.toString());\r
555                                 foundWords.add(word);\r
556                                 conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
557                         }\r
558                 }\r
559                 return;\r
560         }\r
561         \r
562         private void scanUnindexed() {\r
563                 List<String> notes = conn.getNoteTable().getUnindexed();\r
564                 guid = null;\r
565                 boolean started = false;\r
566                 if (notes.size() > 0) {\r
567                         signal.indexStarted.emit();\r
568                         started = true;\r
569                 }\r
570                 for (int i=0; i<notes.size() && !interrupt && keepRunning; i++) {\r
571                         guid = notes.get(i);\r
572                         if (guid != null && keepRunning) {\r
573                                 //waitSeconds(1);\r
574                                 indexNoteContent();\r
575                         }\r
576                 }\r
577                 \r
578                 List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();\r
579                 if (unindexedResources.size() > 0 && !started) {\r
580                         signal.indexStarted.emit();\r
581                         started = true;\r
582                 }\r
583                 for (int i=0; i<unindexedResources.size()&& !interrupt && keepRunning; i++) {\r
584                         guid = unindexedResources.get(i);\r
585                         if (keepRunning) {\r
586                                 //waitSeconds(1);\r
587                                 indexResource();\r
588                         }\r
589                 }\r
590                 if (started && keepRunning && !interrupt) \r
591                         signal.indexFinished.emit();\r
592         }\r
593         \r
594         private void reindexNote() {\r
595                 if (guid == null)\r
596                         return;\r
597                 conn.getNoteTable().setIndexNeeded(guid, true);\r
598         }\r
599         \r
600         private void reindexAll() {\r
601                 conn.getNoteTable().reindexAllNotes();\r
602                 conn.getNoteTable().noteResourceTable.reindexAll(); \r
603         }\r
604 \r
605 //      private void waitSeconds(int len) {\r
606 //              QDateTime currentdate = new QDateTime(QDateTime.currentDateTime());\r
607 //              QDateTime futuredate = new QDateTime(QDateTime.currentDateTime());\r
608 //              \r
609 //              while (keepRunning && (futuredate.toTime_t() - currentdate.toTime_t() >=len) ) {\r
610 //                      Thread.yield();\r
611 //                      futuredate = new QDateTime(QDateTime.currentDateTime());\r
612 //              }\r
613 //      }\r
614 }\r