OSDN Git Service

Upgrade apache & evernote libraries
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / threads / IndexRunner.java
1 /*\r
2  * This file is part of NixNote \r
3  * Copyright 2009 Randy Baumgarte\r
4  * \r
5  * This file may be licensed under the terms of of the\r
6  * GNU General Public License Version 2 (the ``GPL'').\r
7  *\r
8  * Software distributed under the License is distributed\r
9  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
10  * express or implied. See the GPL for the specific language\r
11  * governing rights and limitations.\r
12  *\r
13  * You should have received a copy of the GPL along with this\r
14  * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
15  * or write to the Free Software Foundation, Inc.,\r
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
17  *\r
18 */\r
19 \r
20 package cx.fbn.nevernote.threads;\r
21 \r
22 import java.io.File;\r
23 import java.io.FileInputStream;\r
24 import java.io.FileNotFoundException;\r
25 import java.io.IOException;\r
26 import java.io.InputStream;\r
27 import java.util.List;\r
28 import java.util.TreeSet;\r
29 import java.util.concurrent.LinkedBlockingQueue;\r
30 import java.util.concurrent.locks.LockSupport;\r
31 \r
32 import org.apache.commons.lang3.StringEscapeUtils;\r
33 import org.apache.tika.exception.TikaException;\r
34 import org.apache.tika.metadata.Metadata;\r
35 import org.apache.tika.parser.ParseContext;\r
36 import org.apache.tika.parser.microsoft.OfficeParser;\r
37 import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;\r
38 import org.apache.tika.parser.odf.OpenDocumentParser;\r
39 import org.apache.tika.parser.pdf.PDFParser;\r
40 import org.apache.tika.parser.rtf.RTFParser;\r
41 import org.apache.tika.sax.BodyContentHandler;\r
42 import org.xml.sax.ContentHandler;\r
43 import org.xml.sax.SAXException;\r
44 \r
45 import com.evernote.edam.type.Data;\r
46 import com.evernote.edam.type.Note;\r
47 import com.evernote.edam.type.Resource;\r
48 import com.trolltech.qt.core.QByteArray;\r
49 import com.trolltech.qt.core.QIODevice.OpenModeFlag;\r
50 import com.trolltech.qt.core.QObject;\r
51 import com.trolltech.qt.core.QTemporaryFile;\r
52 import com.trolltech.qt.xml.QDomDocument;\r
53 import com.trolltech.qt.xml.QDomElement;\r
54 import com.trolltech.qt.xml.QDomNodeList;\r
55 \r
56 import cx.fbn.nevernote.Global;\r
57 import cx.fbn.nevernote.signals.IndexSignal;\r
58 import cx.fbn.nevernote.signals.NoteResourceSignal;\r
59 import cx.fbn.nevernote.signals.NoteSignal;\r
60 import cx.fbn.nevernote.sql.DatabaseConnection;\r
61 import cx.fbn.nevernote.utilities.ApplicationLogger;\r
62 \r
63 public class IndexRunner extends QObject implements Runnable {\r
64         \r
65         private final ApplicationLogger         logger;\r
66         private String                                          guid;\r
67         private QByteArray                                      resourceBinary;\r
68         public volatile NoteSignal                      noteSignal;\r
69         public volatile NoteResourceSignal      resourceSignal;\r
70         private int                                                     indexType;\r
71         public final int                                        SCAN=1; \r
72         public final int                                        REINDEXALL=2;\r
73         public final int                                        REINDEXNOTE=3;\r
74         public boolean                                          keepRunning;\r
75         private final QDomDocument                      doc;\r
76         private static String                           regex = Global.getWordRegex();\r
77         public String                                           specialIndexCharacters = "";\r
78         public boolean                                          indexNoteBody = true;\r
79         public boolean                                          indexNoteTitle = true;\r
80         public boolean                                          indexImageRecognition = true;\r
81         private final DatabaseConnection        conn;\r
82         private volatile LinkedBlockingQueue<String> workQueue;\r
83         private static int MAX_QUEUED_WAITING = 1000;\r
84         public boolean interrupt;\r
85         public boolean idle;\r
86         public boolean indexAttachmentsLocally = true;\r
87         public volatile IndexSignal                     signal;\r
88         private final TreeSet<String>           foundWords;\r
89         int uncommittedCount = 0;\r
90 \r
91         \r
92         public IndexRunner(String logname, String u, String i, String r, String uid, String pswd, String cpswd) {\r
93                 foundWords = new TreeSet<String>();\r
94                 logger = new ApplicationLogger(logname);\r
95                 conn = new DatabaseConnection(logger, u, i, r, uid, pswd, cpswd, 500);\r
96                 indexType = SCAN;\r
97                 guid = null;\r
98                 keepRunning = true;\r
99                 doc = new QDomDocument();\r
100                 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);  \r
101         }\r
102         \r
103         public void setIndexType(int t) {\r
104                 indexType = t;\r
105         }\r
106         \r
107         \r
108         @Override\r
109         public void run() {\r
110                 thread().setPriority(Thread.MIN_PRIORITY);\r
111                 noteSignal = new NoteSignal();\r
112                 resourceSignal = new NoteResourceSignal();\r
113                 signal = new IndexSignal();\r
114                 logger.log(logger.EXTREME, "Starting index thread ");\r
115                 while (keepRunning) {\r
116                         idle=true;\r
117                         try {\r
118                                 conn.commitTransaction();\r
119                                 uncommittedCount = 0;\r
120                                 String work = workQueue.take();\r
121                                 idle=false;\r
122                                 if (work.startsWith("SCAN")) {\r
123                                         guid=null;\r
124                                         interrupt = false;\r
125                                         indexType = SCAN;\r
126                                 }\r
127                                 if (work.startsWith("REINDEXALL")) {\r
128                                         guid = null;\r
129                                         indexType=REINDEXALL;\r
130                                 }\r
131                                 if (work.startsWith("REINDEXNOTE")) {\r
132                                         work = work.replace("REINDEXNOTE ", "");\r
133                                         guid = work;\r
134                                         indexType = REINDEXNOTE;\r
135                                 }\r
136                                 if (work.startsWith("STOP")) {\r
137                                         keepRunning = false;\r
138                                         guid = null;\r
139                                 }\r
140                                 logger.log(logger.EXTREME, "Type:" +indexType);\r
141                                 if (indexType == SCAN && keepRunning) {\r
142                                         logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");\r
143                                         scanUnindexed();\r
144                                         setIndexType(0);\r
145                                 }\r
146                                 if (indexType == REINDEXALL && keepRunning) {\r
147                                         logger.log(logger.MEDIUM, "Marking all for reindex");\r
148                                         reindexAll();\r
149                                         setIndexType(0);\r
150                                 }\r
151                                 if (indexType == REINDEXNOTE && keepRunning) {\r
152                                         reindexNote();\r
153                                 }\r
154                         } catch (InterruptedException e) {\r
155                                 logger.log(logger.LOW, "Thread interrupted exception: " +e.getMessage());\r
156                         }\r
157                 }\r
158                 logger.log(logger.EXTREME, "Shutting down database");\r
159                 conn.dbShutdown();\r
160                 logger.log(logger.EXTREME, "Database shut down.  Exiting thread");\r
161         }\r
162         \r
163         // Reindex a note\r
164         public void indexNoteContent() {\r
165                 foundWords.clear();\r
166                 \r
167                 logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
168                 \r
169                 logger.log(logger.EXTREME, "Getting note content");\r
170                 Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
171                 String data;\r
172                 if (indexNoteBody) {\r
173                         data = n.getContent();\r
174                         data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
175                 \r
176                         logger.log(logger.EXTREME, "Removing any encrypted data");\r
177                         data = removeEnCrypt(data.toString());\r
178                         logger.log(logger.EXTREME, "Removing xml markups");\r
179                 } else\r
180                         data = "";\r
181                 String text;\r
182                 if (indexNoteTitle)\r
183                         text =  removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());\r
184                 else\r
185                         text = removeTags(StringEscapeUtils.unescapeHtml4(data));\r
186                                 \r
187                 logger.log(logger.EXTREME, "Splitting words");\r
188                 String[] result = text.toString().split(regex);\r
189                 conn.commitTransaction();\r
190                 conn.beginTransaction();\r
191                 logger.log(logger.EXTREME, "Deleting existing words for note from index");\r
192                 conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");\r
193                 \r
194                 logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
195                 for (int j=0; j<result.length && keepRunning; j++) {\r
196                         if (interrupt) {\r
197                                 processInterrupt();\r
198                         }\r
199                         if (!result[j].trim().equals("")) {\r
200                                 logger.log(logger.EXTREME, "Result word: " +result[j].trim());\r
201                                 addToIndex(guid, result[j], "CONTENT");\r
202                         }\r
203                 }\r
204                 // If we were interrupted, we will reindex this note next time\r
205                 if (Global.keepRunning) {\r
206                         logger.log(logger.EXTREME, "Resetting note guid needed");\r
207                         conn.getNoteTable().setIndexNeeded(guid, false);\r
208                 } \r
209                 conn.commitTransaction();\r
210                 uncommittedCount = 0;\r
211                 logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
212         }\r
213         \r
214         \r
215         private String removeTags(String text) {\r
216                 StringBuffer buffer = new StringBuffer(text);\r
217                 boolean inTag = false;\r
218                 for (int i=buffer.length()-1; i>=0; i--) {\r
219                         if (buffer.charAt(i) == '>')\r
220                                 inTag = true;\r
221                         if (buffer.charAt(i) == '<')\r
222                                 inTag = false;\r
223                         if (inTag || buffer.charAt(i) == '<')\r
224                                 buffer.deleteCharAt(i);\r
225                 }\r
226                 \r
227                 return buffer.toString();\r
228         }\r
229 \r
230         \r
231         public synchronized boolean addWork(String request) {\r
232                 if (workQueue.size() == 0) {\r
233                         workQueue.offer(request);\r
234                         return true;\r
235                 }\r
236                 return false;\r
237         }\r
238         \r
239         public synchronized int getWorkQueueSize() {\r
240                 return workQueue.size();\r
241         }\r
242         \r
243         public void indexResource() {\r
244                 \r
245                 if (guid == null)\r
246                         return;\r
247                 foundWords.clear();\r
248                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);\r
249                 if (!indexImageRecognition || \r
250                                 r == null || r.getRecognition() == null || \r
251                                 r.getRecognition().getBody() == null || \r
252                                 r.getRecognition().getBody().length == 0) \r
253                         resourceBinary = new QByteArray(" ");\r
254                 else\r
255                         resourceBinary = new QByteArray(r.getRecognition().getBody());\r
256                 \r
257                 conn.commitTransaction();\r
258                 conn.beginTransaction();\r
259                 conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");\r
260                 // This is due to an old bug & can be removed at some point in the future 11/23/2010\r
261                 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");   \r
262                 conn.commitTransaction();\r
263                 uncommittedCount = 0;\r
264                 conn.beginTransaction();\r
265                         \r
266                 doc.setContent(resourceBinary);\r
267                 QDomElement docElem = doc.documentElement();\r
268                         \r
269                 // look for text tags\r
270                 QDomNodeList anchors = docElem.elementsByTagName("t");\r
271                 for (int i=0; i<anchors.length() && keepRunning; i++) {\r
272                         if (interrupt) {\r
273                                 if (interrupt) {\r
274                                         processInterrupt();\r
275                                 }\r
276                         }\r
277                         QDomElement enmedia = anchors.at(i).toElement();\r
278                         String weight = new String(enmedia.attribute("w"));\r
279                         String text = new String(enmedia.text()).toLowerCase();\r
280                         if (!text.equals("")) {\r
281                                 conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));\r
282                                 uncommittedCount++;\r
283                                 if (uncommittedCount > 100) {\r
284                                         conn.commitTransaction();\r
285                                         uncommittedCount=0;\r
286                                 }\r
287                         }\r
288                 }\r
289                 \r
290                 if (Global.keepRunning && indexAttachmentsLocally) {\r
291                         conn.commitTransaction();\r
292                         uncommittedCount = 0;\r
293                         conn.beginTransaction();\r
294                         indexResourceContent(guid);\r
295                 }\r
296                                 \r
297                 if (Global.keepRunning)\r
298                         conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
299                 conn.commitTransaction();\r
300                 uncommittedCount = 0;\r
301         }\r
302         \r
303         private void indexResourceContent(String guid) {\r
304                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);\r
305                 if (r.getMime().equalsIgnoreCase("application/pdf")) {\r
306                         indexResourcePDF(r);\r
307                         return;\r
308                 }\r
309                 if (r.getMime().equalsIgnoreCase("application/docx") || \r
310                         r.getMime().equalsIgnoreCase("application/xlsx") || \r
311                         r.getMime().equalsIgnoreCase("application/pptx")) {\r
312                         indexResourceOOXML(r);\r
313                         return;\r
314                 }\r
315                 if (r.getMime().equalsIgnoreCase("application/vsd") ||\r
316                         r.getMime().equalsIgnoreCase("application/ppt") ||\r
317                         r.getMime().equalsIgnoreCase("application/xls") ||\r
318                         r.getMime().equalsIgnoreCase("application/msg") ||\r
319                         r.getMime().equalsIgnoreCase("application/doc")) {\r
320                                 indexResourceOffice(r);\r
321                                 return;\r
322                 }\r
323                 if (r.getMime().equalsIgnoreCase("application/rtf")) {\r
324                                         indexResourceRTF(r);\r
325                                         return;\r
326                 }\r
327                 if (r.getMime().equalsIgnoreCase("application/odf") ||\r
328                         r.getMime().equalsIgnoreCase("application/odt") ||\r
329                         r.getMime().equalsIgnoreCase("application/odp") ||\r
330                         r.getMime().equalsIgnoreCase("application/odg") ||\r
331                         r.getMime().equalsIgnoreCase("application/odb") ||\r
332                         r.getMime().equalsIgnoreCase("application/ods")) {\r
333                         indexResourceODF(r);\r
334                         return;\r
335                 }\r
336         }\r
337 \r
338 \r
339         private void indexResourceRTF(Resource r) {\r
340 \r
341                 Data d = r.getData();\r
342                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
343                         d = r.getData();\r
344                 if (d.getSize()== 0)\r
345                         return;\r
346 \r
347                 QTemporaryFile f = writeResource(d);\r
348                 if (!keepRunning) {\r
349                         return;\r
350                 }\r
351                 \r
352                 InputStream input;\r
353                 try {\r
354                         input = new FileInputStream(new File(f.fileName()));\r
355                         ContentHandler textHandler = new BodyContentHandler(-1);\r
356                         Metadata metadata = new Metadata();\r
357                         RTFParser parser = new RTFParser();     \r
358                         ParseContext context = new ParseContext();\r
359                         parser.parse(input, textHandler, metadata, context);\r
360                         String[] result = textHandler.toString().split(regex);\r
361                         for (int i=0; i<result.length && keepRunning; i++) {\r
362                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
363                         }\r
364                         input.close();\r
365                 \r
366                         f.close();\r
367                 } catch (java.lang.ClassCastException e) {\r
368                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
369                 } catch (FileNotFoundException e) {\r
370                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
371                 } catch (IOException e) {\r
372                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
373                 } catch (SAXException e) {\r
374                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
375                 } catch (TikaException e) {\r
376                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
377                 } catch (Exception e) {\r
378                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
379                 } catch (java.lang.NoSuchMethodError e) {\r
380                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
381                 } catch (Error e) {\r
382                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
383                 }\r
384         }\r
385 \r
386         \r
387         private void indexResourceODF(Resource r) {\r
388 \r
389                 Data d = r.getData();\r
390                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
391                         d = r.getData();\r
392                 if (d.getSize()== 0)\r
393                         return;\r
394                 QTemporaryFile f = writeResource(d);\r
395                 if (!keepRunning) {\r
396                         return;\r
397                 }\r
398                 \r
399                 InputStream input;\r
400                 try {\r
401                         input = new FileInputStream(new File(f.fileName()));\r
402                         ContentHandler textHandler = new BodyContentHandler(-1);\r
403                         Metadata metadata = new Metadata();\r
404                         OpenDocumentParser parser = new OpenDocumentParser();   \r
405                         ParseContext context = new ParseContext();\r
406                         parser.parse(input, textHandler, metadata, context);\r
407                         String[] result = textHandler.toString().split(regex);\r
408                         for (int i=0; i<result.length && keepRunning; i++) {\r
409                                 if (interrupt) {\r
410                                         processInterrupt();\r
411                                 }\r
412                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
413                         }\r
414                         input.close();\r
415                 \r
416                         f.close();\r
417                 } catch (java.lang.ClassCastException e) {\r
418                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
419                 } catch (FileNotFoundException e) {\r
420                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
421                 } catch (IOException e) {\r
422                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
423                 } catch (SAXException e) {\r
424                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
425                 } catch (TikaException e) {\r
426                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
427                 } catch (Exception e) {\r
428                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
429                 } catch (java.lang.NoSuchMethodError e) {\r
430                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
431                 } catch (Error e) {\r
432                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
433                 }\r
434         }\r
435 \r
436         \r
437         private void indexResourceOffice(Resource r) {\r
438 \r
439                 Data d = r.getData();\r
440                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
441                         d = r.getData();\r
442                 if (d.getSize()== 0)\r
443                         return;\r
444                 QTemporaryFile f = writeResource(d);\r
445                 if (!keepRunning) {\r
446                         return;\r
447                 }\r
448                 \r
449                 InputStream input;\r
450                 try {\r
451                         input = new FileInputStream(new File(f.fileName()));\r
452                         ContentHandler textHandler = new BodyContentHandler(-1);\r
453                         Metadata metadata = new Metadata();\r
454                         OfficeParser parser = new OfficeParser();       \r
455                         ParseContext context = new ParseContext();\r
456                         parser.parse(input, textHandler, metadata, context);\r
457                         String[] result = textHandler.toString().split(regex);\r
458                         for (int i=0; i<result.length && keepRunning; i++) {\r
459                                 if (interrupt) {\r
460                                         processInterrupt();\r
461                                 }\r
462                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
463                         }\r
464                         input.close();\r
465                 \r
466                         f.close();\r
467                 } catch (java.lang.ClassCastException e) {\r
468                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
469                 } catch (FileNotFoundException e) {\r
470                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
471                 } catch (IOException e) {\r
472                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
473                 } catch (SAXException e) {\r
474                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
475                 } catch (TikaException e) {\r
476                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
477                 } catch (Exception e) {\r
478                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
479                 } catch (java.lang.NoSuchMethodError e) {\r
480                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
481                 } catch (Error e) {\r
482                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
483                 }\r
484         }\r
485 \r
486         \r
487         \r
488         private void indexResourcePDF(Resource r) {\r
489 \r
490                 Data d = r.getData();\r
491                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
492                         d = r.getData();\r
493                 if (d.getSize()== 0)\r
494                         return;\r
495                 QTemporaryFile f = writeResource(d);\r
496                 if (!keepRunning) {\r
497                         return;\r
498                 }\r
499                 \r
500                 InputStream input;\r
501                 try {                   \r
502                         input = new FileInputStream(new File(f.fileName()));\r
503                         ContentHandler textHandler = new BodyContentHandler(-1);\r
504                         Metadata metadata = new Metadata();\r
505                         PDFParser parser = new PDFParser();     \r
506                         ParseContext context = new ParseContext();\r
507                         parser.parse(input, textHandler, metadata, context);\r
508                         String[] result = textHandler.toString().split(regex);\r
509                         for (int i=0; i<result.length && keepRunning; i++) {\r
510                                 if (interrupt) {\r
511                                         processInterrupt();\r
512                                 }\r
513                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
514                         }\r
515                         input.close();\r
516                 \r
517                         f.close();\r
518                 } catch (java.lang.ClassCastException e) {\r
519                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
520                 } catch (FileNotFoundException e) {\r
521                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
522                 } catch (IOException e) {\r
523                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
524                 } catch (SAXException e) {\r
525                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
526                 } catch (TikaException e) {\r
527                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
528                 } catch (Exception e) {\r
529                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
530                 } catch (java.lang.NoSuchMethodError e) {\r
531                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
532                 } catch (Error e) {\r
533                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
534                 }\r
535         }\r
536         \r
537         \r
538         private void indexResourceOOXML(Resource r) {\r
539 \r
540                 Data d = r.getData();\r
541                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
542                         d = r.getData();\r
543                 if (d.getSize()== 0)\r
544                         return;\r
545                 QTemporaryFile f = writeResource(d);\r
546                 if (!keepRunning) {\r
547                         return;\r
548                 }\r
549                 \r
550                 InputStream input;\r
551                 try {\r
552                         input = new FileInputStream(new File(f.fileName()));\r
553                         ContentHandler textHandler = new BodyContentHandler(-1);\r
554                         Metadata metadata = new Metadata();\r
555                         OOXMLParser parser = new OOXMLParser(); \r
556                         ParseContext context = new ParseContext();\r
557                         parser.parse(input, textHandler, metadata, context);\r
558                         String[] result = textHandler.toString().split(regex);\r
559                         for (int i=0; i<result.length && keepRunning; i++) {\r
560                                 if (interrupt) {\r
561                                         processInterrupt();\r
562                                 }\r
563                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
564                         }\r
565                         input.close();\r
566                 \r
567                         f.close();\r
568                 } catch (java.lang.ClassCastException e) {\r
569                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
570                 } catch (FileNotFoundException e) {\r
571                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
572                 } catch (IOException e) {\r
573                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
574                 } catch (SAXException e) {\r
575                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
576                 } catch (TikaException e) {\r
577                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
578                 } catch (Exception e) {\r
579                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
580                 } catch (java.lang.NoSuchMethodError e) {\r
581                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
582                 } catch (Error e) {\r
583                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());              }\r
584         }\r
585         \r
586 \r
587         \r
588         private QTemporaryFile writeResource(Data d) {\r
589                 QTemporaryFile newFile = new QTemporaryFile();\r
590                 newFile.open(OpenModeFlag.WriteOnly);\r
591                 newFile.write(d.getBody());\r
592                 newFile.close();\r
593                 return newFile;\r
594         } \r
595 \r
596         \r
597         private String removeEnCrypt(String content) {\r
598                 int index = content.indexOf("<en-crypt");\r
599                 int endPos;\r
600                 boolean tagFound = true;\r
601                 while (tagFound && keepRunning) {\r
602                         if (interrupt) {\r
603                                 processInterrupt();\r
604                         }\r
605                         endPos = content.indexOf("</en-crypt>", index)+11;\r
606                         if (endPos > -1 && index > -1) {\r
607                                 content = content.substring(0,index)+content.substring(endPos);\r
608                                 index = content.indexOf("<en-crypt");\r
609                         } else {\r
610                                 tagFound = false;\r
611                         }\r
612                 }\r
613                 return content;\r
614         }\r
615 \r
616         \r
617         private void addToIndex(String guid, String word, String type) {\r
618                 if (foundWords.contains(word))\r
619                         return;\r
620                 StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
621                 for (int i=buffer.length()-1; i>=0; i--) {\r
622                         if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)\r
623                                 buffer.deleteCharAt(i);\r
624                         else\r
625                                 break;\r
626                 }\r
627                 buffer = buffer.reverse();\r
628                 for (int i=buffer.length()-1; i>=0; i--) {\r
629                         if (!Character.isLetterOrDigit(buffer.charAt(i)))\r
630                                 buffer.deleteCharAt(i);\r
631                         else\r
632                                 break;\r
633                 }\r
634                 buffer = buffer.reverse();\r
635                 if (buffer.length() > 0) {\r
636                         // We have a good word, now let's trim off junk at the beginning or end\r
637                         if (!foundWords.contains(buffer.toString())) {\r
638                                 foundWords.add(buffer.toString());\r
639                                 foundWords.add(word);\r
640                                 conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
641                                 uncommittedCount++;\r
642                                 if (uncommittedCount > 100) {\r
643                                         conn.commitTransaction();\r
644                                         uncommittedCount=0;\r
645                                 }\r
646                         }\r
647                 }\r
648                 return;\r
649         }\r
650         \r
651         private void scanUnindexed() {\r
652                 List<String> notes = conn.getNoteTable().getUnindexed();\r
653                 guid = null;\r
654                 boolean started = false;\r
655                 if (notes.size() > 0) {\r
656                         signal.indexStarted.emit();\r
657                         started = true;\r
658                 }\r
659                 for (int i=0; i<notes.size() && keepRunning; i++) {\r
660                         if (interrupt) {\r
661                                 processInterrupt();\r
662                         }\r
663                         guid = notes.get(i);\r
664                         if (guid != null && keepRunning) {\r
665                                 indexNoteContent();\r
666                         }\r
667                 }\r
668                 \r
669                 List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();\r
670                 if (unindexedResources.size() > 0 && !started) {\r
671                         signal.indexStarted.emit();\r
672                         started = true;\r
673                 }\r
674                 for (int i=0; i<unindexedResources.size()&& keepRunning; i++) {\r
675                         if (interrupt) {\r
676                                 processInterrupt();\r
677                         }\r
678                         guid = unindexedResources.get(i);\r
679                         if (keepRunning) {\r
680                                 indexResource();\r
681                         }\r
682                 }\r
683                 \r
684                 // Cleanup stuff that was deleted at some point\r
685                 List<String> guids = conn.getWordsTable().getGuidList();\r
686                 logger.log(logger.LOW, "GUIDS in index: " +guids.size());\r
687                 for (int i=0; i<guids.size() && keepRunning; i++) {\r
688                         if (!conn.getNoteTable().exists(guids.get(i))) {\r
689                                 logger.log(logger.LOW, "Old GUID found: " +guids.get(i));\r
690                                 conn.getWordsTable().expunge(guids.get(i));\r
691                         }\r
692                 }\r
693                 \r
694                 if (started && keepRunning) \r
695                         signal.indexFinished.emit();\r
696         }\r
697         \r
698         private void reindexNote() {\r
699                 if (guid == null)\r
700                         return;\r
701                 conn.getNoteTable().setIndexNeeded(guid, true);\r
702         }\r
703         \r
704         private void reindexAll() {\r
705                 conn.getNoteTable().reindexAllNotes();\r
706                 conn.getNoteTable().noteResourceTable.reindexAll(); \r
707         }\r
708 \r
709         private void waitSeconds(int len) {\r
710                 long starttime = 0; // variable declared\r
711                 //...\r
712                 // for the first time, remember the timestamp\r
713             starttime = System.currentTimeMillis();\r
714                 // the next timestamp we want to wake up\r
715                 starttime += (1000.0);\r
716                 // Wait until the desired next time arrives using nanosecond\r
717                 // accuracy timer (wait(time) isn't accurate enough on most platforms) \r
718                 LockSupport.parkNanos((Math.max(0, \r
719                     starttime - System.currentTimeMillis()) * 1000000));\r
720         }\r
721         \r
722         private void processInterrupt() {\r
723                 conn.commitTransaction();\r
724                 waitSeconds(1);\r
725                 uncommittedCount = 0;\r
726                 conn.beginTransaction();\r
727                 interrupt = false;\r
728         }\r
729         \r
730 }\r