OSDN Git Service

Added check for null resource during indexing.
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / threads / IndexRunner.java
1 /*\r
2  * This file is part of NixNote \r
3  * Copyright 2009 Randy Baumgarte\r
4  * \r
5  * This file may be licensed under the terms of of the\r
6  * GNU General Public License Version 2 (the ``GPL'').\r
7  *\r
8  * Software distributed under the License is distributed\r
9  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
10  * express or implied. See the GPL for the specific language\r
11  * governing rights and limitations.\r
12  *\r
13  * You should have received a copy of the GPL along with this\r
14  * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
15  * or write to the Free Software Foundation, Inc.,\r
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
17  *\r
18 */\r
19 \r
20 package cx.fbn.nevernote.threads;\r
21 \r
22 import java.io.File;\r
23 import java.io.FileInputStream;\r
24 import java.io.FileNotFoundException;\r
25 import java.io.IOException;\r
26 import java.io.InputStream;\r
27 import java.util.List;\r
28 import java.util.TreeSet;\r
29 import java.util.concurrent.LinkedBlockingQueue;\r
30 import java.util.concurrent.locks.LockSupport;\r
31 \r
32 import org.apache.commons.lang3.StringEscapeUtils;\r
33 import org.apache.tika.exception.TikaException;\r
34 import org.apache.tika.metadata.Metadata;\r
35 import org.apache.tika.parser.ParseContext;\r
36 import org.apache.tika.parser.microsoft.OfficeParser;\r
37 import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;\r
38 import org.apache.tika.parser.odf.OpenDocumentParser;\r
39 import org.apache.tika.parser.pdf.PDFParser;\r
40 import org.apache.tika.parser.rtf.RTFParser;\r
41 import org.apache.tika.sax.BodyContentHandler;\r
42 import org.xml.sax.ContentHandler;\r
43 import org.xml.sax.SAXException;\r
44 \r
45 import com.evernote.edam.type.Data;\r
46 import com.evernote.edam.type.Note;\r
47 import com.evernote.edam.type.Resource;\r
48 import com.trolltech.qt.core.QByteArray;\r
49 import com.trolltech.qt.core.QIODevice.OpenModeFlag;\r
50 import com.trolltech.qt.core.QObject;\r
51 import com.trolltech.qt.core.QTemporaryFile;\r
52 import com.trolltech.qt.xml.QDomDocument;\r
53 import com.trolltech.qt.xml.QDomElement;\r
54 import com.trolltech.qt.xml.QDomNodeList;\r
55 \r
56 import cx.fbn.nevernote.Global;\r
57 import cx.fbn.nevernote.signals.IndexSignal;\r
58 import cx.fbn.nevernote.signals.NoteResourceSignal;\r
59 import cx.fbn.nevernote.signals.NoteSignal;\r
60 import cx.fbn.nevernote.sql.DatabaseConnection;\r
61 import cx.fbn.nevernote.utilities.ApplicationLogger;\r
62 \r
63 public class IndexRunner extends QObject implements Runnable {\r
64         \r
65         private final ApplicationLogger         logger;\r
66         private String                                          guid;\r
67         private QByteArray                                      resourceBinary;\r
68         public volatile NoteSignal                      noteSignal;\r
69         public volatile NoteResourceSignal      resourceSignal;\r
70         private int                                                     indexType;\r
71         public final int                                        SCAN=1; \r
72         public final int                                        REINDEXALL=2;\r
73         public final int                                        REINDEXNOTE=3;\r
74         public boolean                                          keepRunning;\r
75         private final QDomDocument                      doc;\r
76         private static String                           regex = Global.getWordRegex();\r
77         public String                                           specialIndexCharacters = "";\r
78         public boolean                                          indexNoteBody = true;\r
79         public boolean                                          indexNoteTitle = true;\r
80         public boolean                                          indexImageRecognition = true;\r
81         private final DatabaseConnection        conn;\r
82         private volatile LinkedBlockingQueue<String> workQueue;\r
83         private static int MAX_QUEUED_WAITING = 1000;\r
84         public boolean interrupt;\r
85         public boolean idle;\r
86         public boolean indexAttachmentsLocally = true;\r
87         public volatile IndexSignal                     signal;\r
88         private final TreeSet<String>           foundWords;\r
89         int uncommittedCount = 0;\r
90 \r
91         \r
92         public IndexRunner(String logname, String u, String i, String r, String uid, String pswd, String cpswd) {\r
93                 foundWords = new TreeSet<String>();\r
94                 logger = new ApplicationLogger(logname);\r
95                 conn = new DatabaseConnection(logger, u, i, r, uid, pswd, cpswd, 500);\r
96                 indexType = SCAN;\r
97                 guid = null;\r
98                 keepRunning = true;\r
99                 doc = new QDomDocument();\r
100                 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);  \r
101         }\r
102         \r
103         public void setIndexType(int t) {\r
104                 indexType = t;\r
105         }\r
106         \r
107         \r
108         @Override\r
109         public void run() {\r
110                 thread().setPriority(Thread.MIN_PRIORITY);\r
111                 noteSignal = new NoteSignal();\r
112                 resourceSignal = new NoteResourceSignal();\r
113                 signal = new IndexSignal();\r
114                 logger.log(logger.EXTREME, "Starting index thread ");\r
115                 while (keepRunning) {\r
116                         idle=true;\r
117                         try {\r
118                                 conn.commitTransaction();\r
119                                 uncommittedCount = 0;\r
120                                 String work = workQueue.take();\r
121                                 idle=false;\r
122                                 if (work.startsWith("SCAN")) {\r
123                                         guid=null;\r
124                                         interrupt = false;\r
125                                         indexType = SCAN;\r
126                                 }\r
127                                 if (work.startsWith("REINDEXALL")) {\r
128                                         guid = null;\r
129                                         indexType=REINDEXALL;\r
130                                 }\r
131                                 if (work.startsWith("REINDEXNOTE")) {\r
132                                         work = work.replace("REINDEXNOTE ", "");\r
133                                         guid = work;\r
134                                         indexType = REINDEXNOTE;\r
135                                 }\r
136                                 if (work.startsWith("STOP")) {\r
137                                         keepRunning = false;\r
138                                         guid = null;\r
139                                 }\r
140                                 logger.log(logger.EXTREME, "Type:" +indexType);\r
141                                 if (indexType == SCAN && keepRunning) {\r
142                                         logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");\r
143                                         scanUnindexed();\r
144                                         setIndexType(0);\r
145                                 }\r
146                                 if (indexType == REINDEXALL && keepRunning) {\r
147                                         logger.log(logger.MEDIUM, "Marking all for reindex");\r
148                                         reindexAll();\r
149                                         setIndexType(0);\r
150                                 }\r
151                                 if (indexType == REINDEXNOTE && keepRunning) {\r
152                                         reindexNote();\r
153                                 }\r
154                         } catch (InterruptedException e) {\r
155                                 logger.log(logger.LOW, "Thread interrupted exception: " +e.getMessage());\r
156                         }\r
157                 }\r
158                 logger.log(logger.EXTREME, "Shutting down database");\r
159                 conn.dbShutdown();\r
160                 logger.log(logger.EXTREME, "Database shut down.  Exiting thread");\r
161         }\r
162         \r
163         // Reindex a note\r
164         public void indexNoteContent() {\r
165                 foundWords.clear();\r
166                 \r
167                 logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
168                 \r
169                 logger.log(logger.EXTREME, "Getting note content");\r
170                 Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
171                 String data;\r
172                 if (indexNoteBody) {\r
173                         data = n.getContent();\r
174                         data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
175                 \r
176                         logger.log(logger.EXTREME, "Removing any encrypted data");\r
177                         data = removeEnCrypt(data.toString());\r
178                         logger.log(logger.EXTREME, "Removing xml markups");\r
179                 } else\r
180                         data = "";\r
181                 String text;\r
182                 if (indexNoteTitle)\r
183                         text =  removeTags(StringEscapeUtils.unescapeHtml4(data) +" "+ n.getTitle());\r
184                 else\r
185                         text = removeTags(StringEscapeUtils.unescapeHtml4(data));\r
186                                 \r
187                 logger.log(logger.EXTREME, "Splitting words");\r
188                 String[] result = text.toString().split(regex);\r
189                 conn.commitTransaction();\r
190                 conn.beginTransaction();\r
191                 logger.log(logger.EXTREME, "Deleting existing words for note from index");\r
192                 conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");\r
193                 \r
194                 logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
195                 for (int j=0; j<result.length && keepRunning; j++) {\r
196                         if (interrupt) {\r
197                                 processInterrupt();\r
198                         }\r
199                         if (!result[j].trim().equals("")) {\r
200                                 logger.log(logger.EXTREME, "Result word: " +result[j].trim());\r
201                                 addToIndex(guid, result[j], "CONTENT");\r
202                         }\r
203                 }\r
204                 // If we were interrupted, we will reindex this note next time\r
205                 if (Global.keepRunning) {\r
206                         logger.log(logger.EXTREME, "Resetting note guid needed");\r
207                         conn.getNoteTable().setIndexNeeded(guid, false);\r
208                 } \r
209                 conn.commitTransaction();\r
210                 uncommittedCount = 0;\r
211                 logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
212         }\r
213         \r
214         \r
215         private String removeTags(String text) {\r
216                 StringBuffer buffer = new StringBuffer(text);\r
217                 boolean inTag = false;\r
218                 for (int i=buffer.length()-1; i>=0; i--) {\r
219                         if (buffer.charAt(i) == '>')\r
220                                 inTag = true;\r
221                         if (buffer.charAt(i) == '<')\r
222                                 inTag = false;\r
223                         if (inTag || buffer.charAt(i) == '<')\r
224                                 buffer.deleteCharAt(i);\r
225                 }\r
226                 \r
227                 return buffer.toString();\r
228         }\r
229 \r
230         \r
231         public synchronized boolean addWork(String request) {\r
232                 if (workQueue.size() == 0) {\r
233                         workQueue.offer(request);\r
234                         return true;\r
235                 }\r
236                 return false;\r
237         }\r
238         \r
239         public synchronized int getWorkQueueSize() {\r
240                 return workQueue.size();\r
241         }\r
242         \r
243         public void indexResource() {\r
244                 \r
245                 if (guid == null)\r
246                         return;\r
247                 foundWords.clear();\r
248                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);\r
249                 if (!indexImageRecognition || \r
250                                 r == null || r.getRecognition() == null || \r
251                                 r.getRecognition().getBody() == null || \r
252                                 r.getRecognition().getBody().length == 0) \r
253                         resourceBinary = new QByteArray(" ");\r
254                 else\r
255                         resourceBinary = new QByteArray(r.getRecognition().getBody());\r
256                 \r
257                 conn.commitTransaction();\r
258                 conn.beginTransaction();\r
259                 conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");\r
260                 // This is due to an old bug & can be removed at some point in the future 11/23/2010\r
261                 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");   \r
262                 conn.commitTransaction();\r
263                 uncommittedCount = 0;\r
264                 conn.beginTransaction();\r
265                         \r
266                 doc.setContent(resourceBinary);\r
267                 QDomElement docElem = doc.documentElement();\r
268                         \r
269                 // look for text tags\r
270                 QDomNodeList anchors = docElem.elementsByTagName("t");\r
271                 for (int i=0; i<anchors.length() && keepRunning; i++) {\r
272                         if (interrupt) {\r
273                                 if (interrupt) {\r
274                                         processInterrupt();\r
275                                 }\r
276                         }\r
277                         QDomElement enmedia = anchors.at(i).toElement();\r
278                         String weight = new String(enmedia.attribute("w"));\r
279                         String text = new String(enmedia.text()).toLowerCase();\r
280                         if (!text.equals("")) {\r
281                                 conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));\r
282                                 uncommittedCount++;\r
283                                 if (uncommittedCount > 100) {\r
284                                         conn.commitTransaction();\r
285                                         uncommittedCount=0;\r
286                                 }\r
287                         }\r
288                 }\r
289                 \r
290                 if (Global.keepRunning && indexAttachmentsLocally) {\r
291                         conn.commitTransaction();\r
292                         uncommittedCount = 0;\r
293                         conn.beginTransaction();\r
294                         indexResourceContent(guid);\r
295                 }\r
296                                 \r
297                 if (Global.keepRunning)\r
298                         conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
299                 conn.commitTransaction();\r
300                 uncommittedCount = 0;\r
301         }\r
302         \r
303         private void indexResourceContent(String guid) {\r
304                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);\r
305                 if (r != null && r.getMime() != null) {\r
306                         if (r.getMime().equalsIgnoreCase("application/pdf")) {\r
307                                 indexResourcePDF(r);\r
308                                 return;\r
309                         }\r
310                         if (r.getMime().equalsIgnoreCase("application/docx") || \r
311                                 r.getMime().equalsIgnoreCase("application/xlsx") || \r
312                                 r.getMime().equalsIgnoreCase("application/pptx")) {\r
313                                 indexResourceOOXML(r);\r
314                                 return;\r
315                         }\r
316                         if (r.getMime().equalsIgnoreCase("application/vsd") ||\r
317                                         r.getMime().equalsIgnoreCase("application/ppt") ||\r
318                                         r.getMime().equalsIgnoreCase("application/xls") ||\r
319                                         r.getMime().equalsIgnoreCase("application/msg") ||\r
320                                         r.getMime().equalsIgnoreCase("application/doc")) {\r
321                                 indexResourceOffice(r);\r
322                                 return;\r
323                         }\r
324                         if (r.getMime().equalsIgnoreCase("application/rtf")) {\r
325                                         indexResourceRTF(r);\r
326                                         return;\r
327                         }\r
328                         if (r.getMime().equalsIgnoreCase("application/odf") ||\r
329                                 r.getMime().equalsIgnoreCase("application/odt") ||\r
330                                 r.getMime().equalsIgnoreCase("application/odp") ||\r
331                                 r.getMime().equalsIgnoreCase("application/odg") ||\r
332                                 r.getMime().equalsIgnoreCase("application/odb") ||\r
333                                 r.getMime().equalsIgnoreCase("application/ods")) {\r
334                                 indexResourceODF(r);\r
335                                 return;\r
336                         }\r
337                 }\r
338         }\r
339 \r
340 \r
341         private void indexResourceRTF(Resource r) {\r
342 \r
343                 Data d = r.getData();\r
344                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
345                         d = r.getData();\r
346                 if (d.getSize()== 0)\r
347                         return;\r
348 \r
349                 QTemporaryFile f = writeResource(d);\r
350                 if (!keepRunning) {\r
351                         return;\r
352                 }\r
353                 \r
354                 InputStream input;\r
355                 try {\r
356                         input = new FileInputStream(new File(f.fileName()));\r
357                         ContentHandler textHandler = new BodyContentHandler(-1);\r
358                         Metadata metadata = new Metadata();\r
359                         RTFParser parser = new RTFParser();     \r
360                         ParseContext context = new ParseContext();\r
361                         parser.parse(input, textHandler, metadata, context);\r
362                         String[] result = textHandler.toString().split(regex);\r
363                         for (int i=0; i<result.length && keepRunning; i++) {\r
364                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
365                         }\r
366                         input.close();\r
367                 \r
368                         f.close();\r
369                 } catch (java.lang.ClassCastException e) {\r
370                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
371                 } catch (FileNotFoundException e) {\r
372                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
373                 } catch (IOException e) {\r
374                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
375                 } catch (SAXException e) {\r
376                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
377                 } catch (TikaException e) {\r
378                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
379                 } catch (Exception e) {\r
380                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
381                 } catch (java.lang.NoSuchMethodError e) {\r
382                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
383                 } catch (Error e) {\r
384                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
385                 }\r
386         }\r
387 \r
388         \r
389         private void indexResourceODF(Resource r) {\r
390 \r
391                 Data d = r.getData();\r
392                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
393                         d = r.getData();\r
394                 if (d.getSize()== 0)\r
395                         return;\r
396                 QTemporaryFile f = writeResource(d);\r
397                 if (!keepRunning) {\r
398                         return;\r
399                 }\r
400                 \r
401                 InputStream input;\r
402                 try {\r
403                         input = new FileInputStream(new File(f.fileName()));\r
404                         ContentHandler textHandler = new BodyContentHandler(-1);\r
405                         Metadata metadata = new Metadata();\r
406                         OpenDocumentParser parser = new OpenDocumentParser();   \r
407                         ParseContext context = new ParseContext();\r
408                         parser.parse(input, textHandler, metadata, context);\r
409                         String[] result = textHandler.toString().split(regex);\r
410                         for (int i=0; i<result.length && keepRunning; i++) {\r
411                                 if (interrupt) {\r
412                                         processInterrupt();\r
413                                 }\r
414                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
415                         }\r
416                         input.close();\r
417                 \r
418                         f.close();\r
419                 } catch (java.lang.ClassCastException e) {\r
420                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
421                 } catch (FileNotFoundException e) {\r
422                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
423                 } catch (IOException e) {\r
424                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
425                 } catch (SAXException e) {\r
426                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
427                 } catch (TikaException e) {\r
428                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
429                 } catch (Exception e) {\r
430                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
431                 } catch (java.lang.NoSuchMethodError e) {\r
432                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
433                 } catch (Error e) {\r
434                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
435                 }\r
436         }\r
437 \r
438         \r
439         private void indexResourceOffice(Resource r) {\r
440 \r
441                 Data d = r.getData();\r
442                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
443                         d = r.getData();\r
444                 if (d.getSize()== 0)\r
445                         return;\r
446                 QTemporaryFile f = writeResource(d);\r
447                 if (!keepRunning) {\r
448                         return;\r
449                 }\r
450                 \r
451                 InputStream input;\r
452                 try {\r
453                         input = new FileInputStream(new File(f.fileName()));\r
454                         ContentHandler textHandler = new BodyContentHandler(-1);\r
455                         Metadata metadata = new Metadata();\r
456                         OfficeParser parser = new OfficeParser();       \r
457                         ParseContext context = new ParseContext();\r
458                         parser.parse(input, textHandler, metadata, context);\r
459                         String[] result = textHandler.toString().split(regex);\r
460                         for (int i=0; i<result.length && keepRunning; i++) {\r
461                                 if (interrupt) {\r
462                                         processInterrupt();\r
463                                 }\r
464                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
465                         }\r
466                         input.close();\r
467                 \r
468                         f.close();\r
469                 } catch (java.lang.ClassCastException e) {\r
470                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
471                 } catch (FileNotFoundException e) {\r
472                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
473                 } catch (IOException e) {\r
474                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
475                 } catch (SAXException e) {\r
476                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
477                 } catch (TikaException e) {\r
478                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
479                 } catch (Exception e) {\r
480                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
481                 } catch (java.lang.NoSuchMethodError e) {\r
482                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
483                 } catch (Error e) {\r
484                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
485                 }\r
486         }\r
487 \r
488         \r
489         \r
490         private void indexResourcePDF(Resource r) {\r
491 \r
492                 Data d = r.getData();\r
493                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
494                         d = r.getData();\r
495                 if (d.getSize()== 0)\r
496                         return;\r
497                 QTemporaryFile f = writeResource(d);\r
498                 if (!keepRunning) {\r
499                         return;\r
500                 }\r
501                 \r
502                 InputStream input;\r
503                 try {                   \r
504                         input = new FileInputStream(new File(f.fileName()));\r
505                         ContentHandler textHandler = new BodyContentHandler(-1);\r
506                         Metadata metadata = new Metadata();\r
507                         PDFParser parser = new PDFParser();     \r
508                         ParseContext context = new ParseContext();\r
509                         parser.parse(input, textHandler, metadata, context);\r
510                         String[] result = textHandler.toString().split(regex);\r
511                         for (int i=0; i<result.length && keepRunning; i++) {\r
512                                 if (interrupt) {\r
513                                         processInterrupt();\r
514                                 }\r
515                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
516                         }\r
517                         input.close();\r
518                 \r
519                         f.close();\r
520                 } catch (java.lang.ClassCastException e) {\r
521                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
522                 } catch (FileNotFoundException e) {\r
523                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
524                 } catch (IOException e) {\r
525                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
526                 } catch (SAXException e) {\r
527                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
528                 } catch (TikaException e) {\r
529                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
530                 } catch (Exception e) {\r
531                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
532                 } catch (java.lang.NoSuchMethodError e) {\r
533                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
534                 } catch (Error e) {\r
535                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());\r
536                 }\r
537         }\r
538         \r
539         \r
540         private void indexResourceOOXML(Resource r) {\r
541 \r
542                 Data d = r.getData();\r
543                 for (int i=0; i<20 && d.getSize() == 0; i++)\r
544                         d = r.getData();\r
545                 if (d.getSize()== 0)\r
546                         return;\r
547                 QTemporaryFile f = writeResource(d);\r
548                 if (!keepRunning) {\r
549                         return;\r
550                 }\r
551                 \r
552                 InputStream input;\r
553                 try {\r
554                         input = new FileInputStream(new File(f.fileName()));\r
555                         ContentHandler textHandler = new BodyContentHandler(-1);\r
556                         Metadata metadata = new Metadata();\r
557                         OOXMLParser parser = new OOXMLParser(); \r
558                         ParseContext context = new ParseContext();\r
559                         parser.parse(input, textHandler, metadata, context);\r
560                         String[] result = textHandler.toString().split(regex);\r
561                         for (int i=0; i<result.length && keepRunning; i++) {\r
562                                 if (interrupt) {\r
563                                         processInterrupt();\r
564                                 }\r
565                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
566                         }\r
567                         input.close();\r
568                 \r
569                         f.close();\r
570                 } catch (java.lang.ClassCastException e) {\r
571                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
572                 } catch (FileNotFoundException e) {\r
573                         logger.log(logger.LOW, "FileNotFound  exception: " +e.getMessage());\r
574                 } catch (IOException e) {\r
575                         logger.log(logger.LOW, "IO  exception: " +e.getMessage());\r
576                 } catch (SAXException e) {\r
577                         logger.log(logger.LOW, "SAX  exception: " +e.getMessage());\r
578                 } catch (TikaException e) {\r
579                         logger.log(logger.LOW, "Tika  exception: " +e.getMessage());\r
580                 } catch (Exception e) {\r
581                         logger.log(logger.LOW, "Unknown  exception: " +e.getMessage());\r
582                 } catch (java.lang.NoSuchMethodError e) {\r
583                         logger.log(logger.LOW, "NoSuchMethod error: " +e.getMessage());\r
584                 } catch (Error e) {\r
585                         logger.log(logger.LOW, "Unknown error: " +e.getMessage());              }\r
586         }\r
587         \r
588 \r
589         \r
590         private QTemporaryFile writeResource(Data d) {\r
591                 QTemporaryFile newFile = new QTemporaryFile();\r
592                 newFile.open(OpenModeFlag.WriteOnly);\r
593                 newFile.write(d.getBody());\r
594                 newFile.close();\r
595                 return newFile;\r
596         } \r
597 \r
598         \r
599         private String removeEnCrypt(String content) {\r
600                 int index = content.indexOf("<en-crypt");\r
601                 int endPos;\r
602                 boolean tagFound = true;\r
603                 while (tagFound && keepRunning) {\r
604                         if (interrupt) {\r
605                                 processInterrupt();\r
606                         }\r
607                         endPos = content.indexOf("</en-crypt>", index)+11;\r
608                         if (endPos > -1 && index > -1) {\r
609                                 content = content.substring(0,index)+content.substring(endPos);\r
610                                 index = content.indexOf("<en-crypt");\r
611                         } else {\r
612                                 tagFound = false;\r
613                         }\r
614                 }\r
615                 return content;\r
616         }\r
617 \r
618         \r
619         private void addToIndex(String guid, String word, String type) {\r
620                 if (foundWords.contains(word))\r
621                         return;\r
622                 StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
623                 for (int i=buffer.length()-1; i>=0; i--) {\r
624                         if (!Character.isLetterOrDigit(buffer.charAt(i)) && specialIndexCharacters.indexOf(buffer.charAt(i)) == -1)\r
625                                 buffer.deleteCharAt(i);\r
626                         else\r
627                                 break;\r
628                 }\r
629                 buffer = buffer.reverse();\r
630                 for (int i=buffer.length()-1; i>=0; i--) {\r
631                         if (!Character.isLetterOrDigit(buffer.charAt(i)))\r
632                                 buffer.deleteCharAt(i);\r
633                         else\r
634                                 break;\r
635                 }\r
636                 buffer = buffer.reverse();\r
637                 if (buffer.length() > 0) {\r
638                         // We have a good word, now let's trim off junk at the beginning or end\r
639                         if (!foundWords.contains(buffer.toString())) {\r
640                                 foundWords.add(buffer.toString());\r
641                                 foundWords.add(word);\r
642                                 conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
643                                 uncommittedCount++;\r
644                                 if (uncommittedCount > 100) {\r
645                                         conn.commitTransaction();\r
646                                         uncommittedCount=0;\r
647                                 }\r
648                         }\r
649                 }\r
650                 return;\r
651         }\r
652         \r
653         private void scanUnindexed() {\r
654                 List<String> notes = conn.getNoteTable().getUnindexed();\r
655                 guid = null;\r
656                 boolean started = false;\r
657                 if (notes.size() > 0) {\r
658                         signal.indexStarted.emit();\r
659                         started = true;\r
660                 }\r
661                 for (int i=0; i<notes.size() && keepRunning; i++) {\r
662                         if (interrupt) {\r
663                                 processInterrupt();\r
664                         }\r
665                         guid = notes.get(i);\r
666                         if (guid != null && keepRunning) {\r
667                                 indexNoteContent();\r
668                         }\r
669                 }\r
670                 \r
671                 List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();\r
672                 if (unindexedResources.size() > 0 && !started) {\r
673                         signal.indexStarted.emit();\r
674                         started = true;\r
675                 }\r
676                 for (int i=0; i<unindexedResources.size()&& keepRunning; i++) {\r
677                         if (interrupt) {\r
678                                 processInterrupt();\r
679                         }\r
680                         guid = unindexedResources.get(i);\r
681                         if (keepRunning) {\r
682                                 indexResource();\r
683                         }\r
684                 }\r
685                 \r
686                 // Cleanup stuff that was deleted at some point\r
687                 List<String> guids = conn.getWordsTable().getGuidList();\r
688                 logger.log(logger.LOW, "GUIDS in index: " +guids.size());\r
689                 for (int i=0; i<guids.size() && keepRunning; i++) {\r
690                         if (!conn.getNoteTable().exists(guids.get(i))) {\r
691                                 logger.log(logger.LOW, "Old GUID found: " +guids.get(i));\r
692                                 conn.getWordsTable().expunge(guids.get(i));\r
693                         }\r
694                 }\r
695                 \r
696                 if (started && keepRunning) \r
697                         signal.indexFinished.emit();\r
698         }\r
699         \r
700         private void reindexNote() {\r
701                 if (guid == null)\r
702                         return;\r
703                 conn.getNoteTable().setIndexNeeded(guid, true);\r
704         }\r
705         \r
706         private void reindexAll() {\r
707                 conn.getNoteTable().reindexAllNotes();\r
708                 conn.getNoteTable().noteResourceTable.reindexAll(); \r
709         }\r
710 \r
711         private void waitSeconds(int len) {\r
712                 long starttime = 0; // variable declared\r
713                 //...\r
714                 // for the first time, remember the timestamp\r
715             starttime = System.currentTimeMillis();\r
716                 // the next timestamp we want to wake up\r
717                 starttime += (1000.0);\r
718                 // Wait until the desired next time arrives using nanosecond\r
719                 // accuracy timer (wait(time) isn't accurate enough on most platforms) \r
720                 LockSupport.parkNanos((Math.max(0, \r
721                     starttime - System.currentTimeMillis()) * 1000000));\r
722         }\r
723         \r
724         private void processInterrupt() {\r
725                 conn.commitTransaction();\r
726                 waitSeconds(1);\r
727                 uncommittedCount = 0;\r
728                 conn.beginTransaction();\r
729                 interrupt = false;\r
730         }\r
731         \r
732 }\r