OSDN Git Service

Add logic to deal with zero length resources.
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / threads / IndexRunner.java
1 /*\r
2  * This file is part of NeverNote \r
3  * Copyright 2009 Randy Baumgarte\r
4  * \r
5  * This file may be licensed under the terms of of the\r
6  * GNU General Public License Version 2 (the ``GPL'').\r
7  *\r
8  * Software distributed under the License is distributed\r
9  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
10  * express or implied. See the GPL for the specific language\r
11  * governing rights and limitations.\r
12  *\r
13  * You should have received a copy of the GPL along with this\r
14  * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
15  * or write to the Free Software Foundation, Inc.,\r
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
17  *\r
18 */\r
19 \r
20 package cx.fbn.nevernote.threads;\r
21 \r
22 import java.io.File;\r
23 import java.io.FileInputStream;\r
24 import java.io.FileNotFoundException;\r
25 import java.io.IOException;\r
26 import java.io.InputStream;\r
27 import java.util.List;\r
28 import java.util.TreeSet;\r
29 import java.util.concurrent.LinkedBlockingQueue;\r
30 import java.util.concurrent.locks.LockSupport;\r
31 \r
32 import org.apache.commons.lang.StringEscapeUtils;\r
33 import org.apache.tika.exception.TikaException;\r
34 import org.apache.tika.metadata.Metadata;\r
35 import org.apache.tika.parser.ParseContext;\r
36 import org.apache.tika.parser.microsoft.OfficeParser;\r
37 import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;\r
38 import org.apache.tika.parser.odf.OpenDocumentParser;\r
39 import org.apache.tika.parser.pdf.PDFParser;\r
40 import org.apache.tika.parser.rtf.RTFParser;\r
41 import org.apache.tika.sax.BodyContentHandler;\r
42 import org.xml.sax.ContentHandler;\r
43 import org.xml.sax.SAXException;\r
44 \r
45 import com.evernote.edam.type.Data;\r
46 import com.evernote.edam.type.Note;\r
47 import com.evernote.edam.type.Resource;\r
48 import com.trolltech.qt.core.QByteArray;\r
49 import com.trolltech.qt.core.QIODevice.OpenModeFlag;\r
50 import com.trolltech.qt.core.QObject;\r
51 import com.trolltech.qt.core.QTemporaryFile;\r
52 import com.trolltech.qt.xml.QDomDocument;\r
53 import com.trolltech.qt.xml.QDomElement;\r
54 import com.trolltech.qt.xml.QDomNodeList;\r
55 \r
56 import cx.fbn.nevernote.Global;\r
57 import cx.fbn.nevernote.signals.IndexSignal;\r
58 import cx.fbn.nevernote.signals.NoteResourceSignal;\r
59 import cx.fbn.nevernote.signals.NoteSignal;\r
60 import cx.fbn.nevernote.sql.DatabaseConnection;\r
61 import cx.fbn.nevernote.utilities.ApplicationLogger;\r
62 \r
63 public class IndexRunner extends QObject implements Runnable {\r
64         \r
65         private final ApplicationLogger         logger;\r
66         private String                                          guid;\r
67         private QByteArray                                      resourceBinary;\r
68         public volatile NoteSignal                      noteSignal;\r
69         public volatile NoteResourceSignal      resourceSignal;\r
70         private int                                                     indexType;\r
71         public final int                                        SCAN=1; \r
72         public final int                                        REINDEXALL=2;\r
73         public final int                                        REINDEXNOTE=3;\r
74         public boolean                                          keepRunning;\r
75         private final QDomDocument                      doc;\r
76         private static String                           regex = Global.getWordRegex();\r
77         private final DatabaseConnection        conn;\r
78         private volatile LinkedBlockingQueue<String> workQueue;\r
79         private static int MAX_QUEUED_WAITING = 1000;\r
80         public boolean interrupt;\r
81         public boolean idle;\r
82         public boolean indexAttachmentsLocally = true;\r
83         public volatile IndexSignal                     signal;\r
84         private final TreeSet<String>           foundWords;\r
85         int uncommittedCount = 0;\r
86 \r
87         \r
88         public IndexRunner(String logname, String u, String i, String r, String uid, String pswd, String cpswd) {\r
89                 foundWords = new TreeSet<String>();\r
90                 logger = new ApplicationLogger(logname);\r
91                 conn = new DatabaseConnection(logger, u, i, r, uid, pswd, cpswd, 500);\r
92                 indexType = SCAN;\r
93                 guid = null;\r
94                 keepRunning = true;\r
95                 doc = new QDomDocument();\r
96                 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);  \r
97         }\r
98         \r
99         public void setIndexType(int t) {\r
100                 indexType = t;\r
101         }\r
102         \r
103         \r
104         @Override\r
105         public void run() {\r
106                 thread().setPriority(Thread.MIN_PRIORITY);\r
107                 noteSignal = new NoteSignal();\r
108                 resourceSignal = new NoteResourceSignal();\r
109                 signal = new IndexSignal();\r
110                 logger.log(logger.EXTREME, "Starting index thread ");\r
111                 while (keepRunning) {\r
112                         idle=true;\r
113                         try {\r
114                                 conn.commitTransaction();\r
115                                 uncommittedCount = 0;\r
116                                 String work = workQueue.take();\r
117                                 idle=false;\r
118                                 if (work.startsWith("SCAN")) {\r
119                                         guid=null;\r
120                                         interrupt = false;\r
121                                         indexType = SCAN;\r
122                                 }\r
123                                 if (work.startsWith("REINDEXALL")) {\r
124                                         guid = null;\r
125                                         indexType=REINDEXALL;\r
126                                 }\r
127                                 if (work.startsWith("REINDEXNOTE")) {\r
128                                         work = work.replace("REINDEXNOTE ", "");\r
129                                         guid = work;\r
130                                         indexType = REINDEXNOTE;\r
131                                 }\r
132                                 if (work.startsWith("STOP")) {\r
133                                         keepRunning = false;\r
134                                         guid = null;\r
135                                 }\r
136                                 logger.log(logger.EXTREME, "Type:" +indexType);\r
137                                 if (indexType == SCAN && keepRunning) {\r
138                                         logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");\r
139                                         scanUnindexed();\r
140                                         setIndexType(0);\r
141                                 }\r
142                                 if (indexType == REINDEXALL && keepRunning) {\r
143                                         logger.log(logger.MEDIUM, "Marking all for reindex");\r
144                                         reindexAll();\r
145                                         setIndexType(0);\r
146                                 }\r
147                                 if (indexType == REINDEXNOTE && keepRunning) {\r
148                                         reindexNote();\r
149                                 }\r
150                         } catch (InterruptedException e) {\r
151                                 logger.log(logger.LOW, "Thread interrupted exception: " +e.getMessage());\r
152                         }\r
153                 }\r
154                 logger.log(logger.EXTREME, "Shutting down database");\r
155                 conn.dbShutdown();\r
156                 logger.log(logger.EXTREME, "Database shut down.  Exiting thread");\r
157         }\r
158         \r
159         // Reindex a note\r
160         public void indexNoteContent() {\r
161                 foundWords.clear();\r
162                 \r
163                 logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
164                 \r
165                 logger.log(logger.EXTREME, "Getting note content");\r
166                 Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
167                 String data = n.getContent();\r
168                 data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
169                 \r
170                 logger.log(logger.EXTREME, "Removing any encrypted data");\r
171                 data = removeEnCrypt(data.toString());\r
172                 logger.log(logger.EXTREME, "Removing xml markups");\r
173                 String text =  removeTags(StringEscapeUtils.unescapeHtml(data) +" "+\r
174                 n.getTitle());\r
175                                 \r
176                 logger.log(logger.EXTREME, "Splitting words");\r
177                 String[] result = text.toString().split(regex);\r
178                 conn.commitTransaction();\r
179                 conn.beginTransaction();\r
180                 logger.log(logger.EXTREME, "Deleting existing words for note from index");\r
181                 conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");\r
182                 \r
183                 logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
184                 for (int j=0; j<result.length && keepRunning; j++) {\r
185                         if (interrupt) {\r
186                                 processInterrupt();\r
187                         }\r
188                         if (!result[j].trim().equals("")) {\r
189                                 logger.log(logger.EXTREME, "Result word: " +result[j].trim());\r
190                                 addToIndex(guid, result[j], "CONTENT");\r
191                         }\r
192                 }\r
193                 // If we were interrupted, we will reindex this note next time\r
194                 if (Global.keepRunning) {\r
195                         logger.log(logger.EXTREME, "Resetting note guid needed");\r
196                         conn.getNoteTable().setIndexNeeded(guid, false);\r
197                 } \r
198                 conn.commitTransaction();\r
199                 uncommittedCount = 0;\r
200                 logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
201         }\r
202         \r
203         \r
204         private String removeTags(String text) {\r
205                 StringBuffer buffer = new StringBuffer(text);\r
206                 boolean inTag = false;\r
207                 for (int i=buffer.length()-1; i>=0; i--) {\r
208                         if (buffer.charAt(i) == '>')\r
209                                 inTag = true;\r
210                         if (buffer.charAt(i) == '<')\r
211                                 inTag = false;\r
212                         if (inTag || buffer.charAt(i) == '<')\r
213                                 buffer.deleteCharAt(i);\r
214                 }\r
215                 \r
216                 return buffer.toString();\r
217         }\r
218 \r
219         \r
220         public synchronized boolean addWork(String request) {\r
221                 if (workQueue.size() == 0) {\r
222                         workQueue.offer(request);\r
223                         return true;\r
224                 }\r
225                 return false;\r
226         }\r
227         \r
228         public synchronized int getWorkQueueSize() {\r
229                 return workQueue.size();\r
230         }\r
231         \r
232         public void indexResource() {\r
233                 \r
234                 if (guid == null)\r
235                         return;\r
236                 foundWords.clear();\r
237                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);\r
238                 if (r == null || r.getRecognition() == null || r.getRecognition().getBody() == null || r.getRecognition().getBody().length == 0) \r
239                         resourceBinary = new QByteArray(" ");\r
240                 else\r
241                         resourceBinary = new QByteArray(r.getRecognition().getBody());\r
242                 \r
243                 conn.commitTransaction();\r
244                 conn.beginTransaction();\r
245                 conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");\r
246                 // This is due to an old bug & can be removed at some point in the future 11/23/2010\r
247                 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");   \r
248                 conn.commitTransaction();\r
249                 uncommittedCount = 0;\r
250                 conn.beginTransaction();\r
251                         \r
252                 doc.setContent(resourceBinary);\r
253                 QDomElement docElem = doc.documentElement();\r
254                         \r
255                 // look for text tags\r
256                 QDomNodeList anchors = docElem.elementsByTagName("t");\r
257                 for (int i=0; i<anchors.length() && keepRunning; i++) {\r
258                         if (interrupt) {\r
259                                 if (interrupt) {\r
260                                         processInterrupt();\r
261                                 }\r
262                         }\r
263                         QDomElement enmedia = anchors.at(i).toElement();\r
264                         String weight = new String(enmedia.attribute("w"));\r
265                         String text = new String(enmedia.text()).toLowerCase();\r
266                         if (!text.equals("")) {\r
267                                 conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));\r
268                                 uncommittedCount++;\r
269                                 if (uncommittedCount > 100) {\r
270                                         conn.commitTransaction();\r
271                                         uncommittedCount=0;\r
272                                 }\r
273                         }\r
274                 }\r
275                 \r
276                 if (Global.keepRunning && indexAttachmentsLocally) {\r
277                         conn.commitTransaction();\r
278                         uncommittedCount = 0;\r
279                         conn.beginTransaction();\r
280                         indexResourceContent(guid);\r
281                 }\r
282                                 \r
283                 if (Global.keepRunning)\r
284                         conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
285                 conn.commitTransaction();\r
286                 uncommittedCount = 0;\r
287         }\r
288         \r
289         private void indexResourceContent(String guid) {\r
290                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);\r
291                 if (r.getMime().equalsIgnoreCase("application/pdf")) {\r
292                         indexResourcePDF(r);\r
293                         return;\r
294                 }\r
295                 if (r.getMime().equalsIgnoreCase("application/docx") || \r
296                         r.getMime().equalsIgnoreCase("application/xlsx") || \r
297                         r.getMime().equalsIgnoreCase("application/pptx")) {\r
298                         indexResourceOOXML(r);\r
299                         return;\r
300                 }\r
301                 if (r.getMime().equalsIgnoreCase("application/vsd") ||\r
302                         r.getMime().equalsIgnoreCase("application/ppt") ||\r
303                         r.getMime().equalsIgnoreCase("application/xls") ||\r
304                         r.getMime().equalsIgnoreCase("application/msg") ||\r
305                         r.getMime().equalsIgnoreCase("application/doc")) {\r
306                                 indexResourceOffice(r);\r
307                                 return;\r
308                 }\r
309                 if (r.getMime().equalsIgnoreCase("application/rtf")) {\r
310                                         indexResourceRTF(r);\r
311                                         return;\r
312                 }\r
313                 if (r.getMime().equalsIgnoreCase("application/odf") ||\r
314                         r.getMime().equalsIgnoreCase("application/odt") ||\r
315                         r.getMime().equalsIgnoreCase("application/odp") ||\r
316                         r.getMime().equalsIgnoreCase("application/odg") ||\r
317                         r.getMime().equalsIgnoreCase("application/odb") ||\r
318                         r.getMime().equalsIgnoreCase("application/ods")) {\r
319                         indexResourceODF(r);\r
320                         return;\r
321                 }\r
322         }\r
323 \r
324 \r
325         private void indexResourceRTF(Resource r) {\r
326 \r
327                 Data d = r.getData();\r
328                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
329                         d = r.getData();\r
330                 if (d.getSize()== 0)\r
331                         return;\r
332 \r
333                 QTemporaryFile f = writeResource(d);\r
334                 if (!keepRunning) {\r
335                         return;\r
336                 }\r
337                 \r
338                 InputStream input;\r
339                 try {\r
340                         input = new FileInputStream(new File(f.fileName()));\r
341                         ContentHandler textHandler = new BodyContentHandler(-1);\r
342                         Metadata metadata = new Metadata();\r
343                         RTFParser parser = new RTFParser();     \r
344                         ParseContext context = new ParseContext();\r
345                         parser.parse(input, textHandler, metadata, context);\r
346                         String[] result = textHandler.toString().split(regex);\r
347                         for (int i=0; i<result.length && keepRunning; i++) {\r
348                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
349                         }\r
350                         input.close();\r
351                 \r
352                         f.close();\r
353                 } catch (java.lang.ClassCastException e) {\r
354                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
355                 } catch (FileNotFoundException e) {\r
356                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
357                 } catch (IOException e) {\r
358                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
359                 } catch (SAXException e) {\r
360                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
361                 } catch (TikaException e) {\r
362                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
363                 } catch (Exception e) {\r
364                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
365                 } catch (java.lang.NoSuchMethodError e) {\r
366                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
367                 } catch (Error e) {\r
368                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
369                 }\r
370         }\r
371 \r
372         \r
373         private void indexResourceODF(Resource r) {\r
374 \r
375                 Data d = r.getData();\r
376                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
377                         d = r.getData();\r
378                 if (d.getSize()== 0)\r
379                         return;\r
380                 QTemporaryFile f = writeResource(d);\r
381                 if (!keepRunning) {\r
382                         return;\r
383                 }\r
384                 \r
385                 InputStream input;\r
386                 try {\r
387                         input = new FileInputStream(new File(f.fileName()));\r
388                         ContentHandler textHandler = new BodyContentHandler(-1);\r
389                         Metadata metadata = new Metadata();\r
390                         OpenDocumentParser parser = new OpenDocumentParser();   \r
391                         ParseContext context = new ParseContext();\r
392                         parser.parse(input, textHandler, metadata, context);\r
393                         String[] result = textHandler.toString().split(regex);\r
394                         for (int i=0; i<result.length && keepRunning; i++) {\r
395                                 if (interrupt) {\r
396                                         processInterrupt();\r
397                                 }\r
398                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
399                         }\r
400                         input.close();\r
401                 \r
402                         f.close();\r
403                 } catch (java.lang.ClassCastException e) {\r
404                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
405                 } catch (FileNotFoundException e) {\r
406                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
407                 } catch (IOException e) {\r
408                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
409                 } catch (SAXException e) {\r
410                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
411                 } catch (TikaException e) {\r
412                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
413                 } catch (Exception e) {\r
414                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
415                 } catch (java.lang.NoSuchMethodError e) {\r
416                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
417                 } catch (Error e) {\r
418                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
419                 }\r
420         }\r
421 \r
422         \r
423         private void indexResourceOffice(Resource r) {\r
424 \r
425                 Data d = r.getData();\r
426                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
427                         d = r.getData();\r
428                 if (d.getSize()== 0)\r
429                         return;\r
430                 QTemporaryFile f = writeResource(d);\r
431                 if (!keepRunning) {\r
432                         return;\r
433                 }\r
434                 \r
435                 InputStream input;\r
436                 try {\r
437                         input = new FileInputStream(new File(f.fileName()));\r
438                         ContentHandler textHandler = new BodyContentHandler(-1);\r
439                         Metadata metadata = new Metadata();\r
440                         OfficeParser parser = new OfficeParser();       \r
441                         ParseContext context = new ParseContext();\r
442                         parser.parse(input, textHandler, metadata, context);\r
443                         String[] result = textHandler.toString().split(regex);\r
444                         for (int i=0; i<result.length && keepRunning; i++) {\r
445                                 if (interrupt) {\r
446                                         processInterrupt();\r
447                                 }\r
448                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
449                         }\r
450                         input.close();\r
451                 \r
452                         f.close();\r
453                 } catch (java.lang.ClassCastException e) {\r
454                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
455                 } catch (FileNotFoundException e) {\r
456                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
457                 } catch (IOException e) {\r
458                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
459                 } catch (SAXException e) {\r
460                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
461                 } catch (TikaException e) {\r
462                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
463                 } catch (Exception e) {\r
464                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
465                 } catch (java.lang.NoSuchMethodError e) {\r
466                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
467                 } catch (Error e) {\r
468                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
469                 }\r
470         }\r
471 \r
472         \r
473         \r
474         private void indexResourcePDF(Resource r) {\r
475 \r
476                 Data d = r.getData();\r
477                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
478                         d = r.getData();\r
479                 if (d.getSize()== 0)\r
480                         return;\r
481                 QTemporaryFile f = writeResource(d);\r
482                 if (!keepRunning) {\r
483                         return;\r
484                 }\r
485                 \r
486                 InputStream input;\r
487                 try {                   \r
488                         input = new FileInputStream(new File(f.fileName()));\r
489                         ContentHandler textHandler = new BodyContentHandler(-1);\r
490                         Metadata metadata = new Metadata();\r
491                         PDFParser parser = new PDFParser();     \r
492                         ParseContext context = new ParseContext();\r
493                         parser.parse(input, textHandler, metadata, context);\r
494                         String[] result = textHandler.toString().split(regex);\r
495                         for (int i=0; i<result.length && keepRunning; i++) {\r
496                                 if (interrupt) {\r
497                                         processInterrupt();\r
498                                 }\r
499                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
500                         }\r
501                         input.close();\r
502                 \r
503                         f.close();\r
504                 } catch (java.lang.ClassCastException e) {\r
505                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
506                 } catch (FileNotFoundException e) {\r
507                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
508                 } catch (IOException e) {\r
509                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
510                 } catch (SAXException e) {\r
511                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
512                 } catch (TikaException e) {\r
513                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
514                 } catch (Exception e) {\r
515                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
516                 } catch (java.lang.NoSuchMethodError e) {\r
517                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
518                 } catch (Error e) {\r
519                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
520                 }\r
521         }\r
522         \r
523         \r
524         private void indexResourceOOXML(Resource r) {\r
525 \r
526                 Data d = r.getData();\r
527                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
528                         d = r.getData();\r
529                 if (d.getSize()== 0)\r
530                         return;\r
531                 QTemporaryFile f = writeResource(d);\r
532                 if (!keepRunning) {\r
533                         return;\r
534                 }\r
535                 \r
536                 InputStream input;\r
537                 try {\r
538                         input = new FileInputStream(new File(f.fileName()));\r
539                         ContentHandler textHandler = new BodyContentHandler(-1);\r
540                         Metadata metadata = new Metadata();\r
541                         OOXMLParser parser = new OOXMLParser(); \r
542                         ParseContext context = new ParseContext();\r
543                         parser.parse(input, textHandler, metadata, context);\r
544                         String[] result = textHandler.toString().split(regex);\r
545                         for (int i=0; i<result.length && keepRunning; i++) {\r
546                                 if (interrupt) {\r
547                                         processInterrupt();\r
548                                 }\r
549                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
550                         }\r
551                         input.close();\r
552                 \r
553                         f.close();\r
554                 } catch (java.lang.ClassCastException e) {\r
555                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
556                 } catch (FileNotFoundException e) {\r
557                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
558                 } catch (IOException e) {\r
559                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
560                 } catch (SAXException e) {\r
561                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
562                 } catch (TikaException e) {\r
563                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
564                 } catch (Exception e) {\r
565                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
566                 } catch (java.lang.NoSuchMethodError e) {\r
567                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
568                 } catch (Error e) {\r
569                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());              }\r
570         }\r
571         \r
572 \r
573         \r
574         private QTemporaryFile writeResource(Data d) {\r
575                 QTemporaryFile newFile = new QTemporaryFile();\r
576                 newFile.open(OpenModeFlag.WriteOnly);\r
577                 newFile.write(d.getBody());\r
578                 newFile.close();\r
579                 return newFile;\r
580         } \r
581 \r
582         \r
583         private String removeEnCrypt(String content) {\r
584                 int index = content.indexOf("<en-crypt");\r
585                 int endPos;\r
586                 boolean tagFound = true;\r
587                 while (tagFound && keepRunning) {\r
588                         if (interrupt) {\r
589                                 processInterrupt();\r
590                         }\r
591                         endPos = content.indexOf("</en-crypt>", index)+11;\r
592                         if (endPos > -1 && index > -1) {\r
593                                 content = content.substring(0,index)+content.substring(endPos);\r
594                                 index = content.indexOf("<en-crypt");\r
595                         } else {\r
596                                 tagFound = false;\r
597                         }\r
598                 }\r
599                 return content;\r
600         }\r
601 \r
602         \r
603         private void addToIndex(String guid, String word, String type) {\r
604                 if (foundWords.contains(word))\r
605                         return;\r
606                 StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
607                 for (int i=buffer.length()-1; i>=0; i--) {\r
608                         if (!Character.isLetterOrDigit(buffer.charAt(i)))\r
609                                 buffer.deleteCharAt(i);\r
610                         else\r
611                                 break;\r
612                 }\r
613                 buffer = buffer.reverse();\r
614                 for (int i=buffer.length()-1; i>=0; i--) {\r
615                         if (!Character.isLetterOrDigit(buffer.charAt(i)))\r
616                                 buffer.deleteCharAt(i);\r
617                         else\r
618                                 break;\r
619                 }\r
620                 buffer = buffer.reverse();\r
621                 if (buffer.length() > 0) {\r
622                         // We have a good word, now let's trim off junk at the beginning or end\r
623                         if (!foundWords.contains(buffer.toString())) {\r
624                                 foundWords.add(buffer.toString());\r
625                                 foundWords.add(word);\r
626                                 conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
627                                 uncommittedCount++;\r
628                                 if (uncommittedCount > 100) {\r
629                                         conn.commitTransaction();\r
630                                         uncommittedCount=0;\r
631                                 }\r
632                         }\r
633                 }\r
634                 return;\r
635         }\r
636         \r
637         private void scanUnindexed() {\r
638                 List<String> notes = conn.getNoteTable().getUnindexed();\r
639                 guid = null;\r
640                 boolean started = false;\r
641                 if (notes.size() > 0) {\r
642                         signal.indexStarted.emit();\r
643                         started = true;\r
644                 }\r
645                 for (int i=0; i<notes.size() && keepRunning; i++) {\r
646                         if (interrupt) {\r
647                                 processInterrupt();\r
648                         }\r
649                         guid = notes.get(i);\r
650                         if (guid != null && keepRunning) {\r
651                                 indexNoteContent();\r
652                         }\r
653                 }\r
654                 \r
655                 List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();\r
656                 if (unindexedResources.size() > 0 && !started) {\r
657                         signal.indexStarted.emit();\r
658                         started = true;\r
659                 }\r
660                 for (int i=0; i<unindexedResources.size()&& keepRunning; i++) {\r
661                         if (interrupt) {\r
662                                 processInterrupt();\r
663                         }\r
664                         guid = unindexedResources.get(i);\r
665                         if (keepRunning) {\r
666                                 indexResource();\r
667                         }\r
668                 }\r
669                 if (started && keepRunning) \r
670                         signal.indexFinished.emit();\r
671         }\r
672         \r
673         private void reindexNote() {\r
674                 if (guid == null)\r
675                         return;\r
676                 conn.getNoteTable().setIndexNeeded(guid, true);\r
677         }\r
678         \r
679         private void reindexAll() {\r
680                 conn.getNoteTable().reindexAllNotes();\r
681                 conn.getNoteTable().noteResourceTable.reindexAll(); \r
682         }\r
683 \r
684         private void waitSeconds(int len) {\r
685                 long starttime = 0; // variable declared\r
686                 //...\r
687                 // for the first time, remember the timestamp\r
688             starttime = System.currentTimeMillis();\r
689                 // the next timestamp we want to wake up\r
690                 starttime += (1000.0);\r
691                 // Wait until the desired next time arrives using nanosecond\r
692                 // accuracy timer (wait(time) isn't accurate enough on most platforms) \r
693                 LockSupport.parkNanos((Math.max(0, \r
694                     starttime - System.currentTimeMillis()) * 1000000));\r
695         }\r
696         \r
697         private void processInterrupt() {\r
698                 conn.commitTransaction();\r
699                 waitSeconds(1);\r
700                 uncommittedCount = 0;\r
701                 conn.beginTransaction();\r
702                 interrupt = false;\r
703         }\r
704         \r
705 }\r