OSDN Git Service

Add selective sync, cleanup debug messages, & correct network sync not disconnecting...
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / threads / IndexRunner.java
1 /*\r
2  * This file is part of NeverNote \r
3  * Copyright 2009 Randy Baumgarte\r
4  * \r
5  * This file may be licensed under the terms of of the\r
6  * GNU General Public License Version 2 (the ``GPL'').\r
7  *\r
8  * Software distributed under the License is distributed\r
9  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
10  * express or implied. See the GPL for the specific language\r
11  * governing rights and limitations.\r
12  *\r
13  * You should have received a copy of the GPL along with this\r
14  * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
15  * or write to the Free Software Foundation, Inc.,\r
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
17  *\r
18 */\r
19 \r
20 package cx.fbn.nevernote.threads;\r
21 \r
22 import java.io.File;\r
23 import java.io.FileInputStream;\r
24 import java.io.FileNotFoundException;\r
25 import java.io.IOException;\r
26 import java.io.InputStream;\r
27 import java.util.List;\r
28 import java.util.concurrent.LinkedBlockingQueue;\r
29 \r
30 import org.apache.commons.lang.StringEscapeUtils;\r
31 import org.apache.tika.exception.TikaException;\r
32 import org.apache.tika.metadata.Metadata;\r
33 import org.apache.tika.parser.ParseContext;\r
34 import org.apache.tika.parser.microsoft.OfficeParser;\r
35 import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;\r
36 import org.apache.tika.parser.odf.OpenDocumentParser;\r
37 import org.apache.tika.parser.pdf.PDFParser;\r
38 import org.apache.tika.parser.rtf.RTFParser;\r
39 import org.apache.tika.sax.BodyContentHandler;\r
40 import org.xml.sax.ContentHandler;\r
41 import org.xml.sax.SAXException;\r
42 \r
43 import com.evernote.edam.type.Data;\r
44 import com.evernote.edam.type.Note;\r
45 import com.evernote.edam.type.Resource;\r
46 import com.trolltech.qt.core.QByteArray;\r
47 import com.trolltech.qt.core.QIODevice.OpenModeFlag;\r
48 import com.trolltech.qt.core.QObject;\r
49 import com.trolltech.qt.core.QTemporaryFile;\r
50 import com.trolltech.qt.xml.QDomDocument;\r
51 import com.trolltech.qt.xml.QDomElement;\r
52 import com.trolltech.qt.xml.QDomNodeList;\r
53 \r
54 import cx.fbn.nevernote.Global;\r
55 import cx.fbn.nevernote.signals.IndexSignal;\r
56 import cx.fbn.nevernote.signals.NoteResourceSignal;\r
57 import cx.fbn.nevernote.signals.NoteSignal;\r
58 import cx.fbn.nevernote.sql.DatabaseConnection;\r
59 import cx.fbn.nevernote.utilities.ApplicationLogger;\r
60 \r
61 public class IndexRunner extends QObject implements Runnable {\r
62         \r
63         private final ApplicationLogger         logger;\r
64         private String                                          guid;\r
65         private QByteArray                                      resourceBinary;\r
66         public volatile NoteSignal                      noteSignal;\r
67         public volatile NoteResourceSignal      resourceSignal;\r
68         private int                                                     indexType;\r
69         public final int                                        SCAN=1; \r
70         public final int                                        REINDEXALL=2;\r
71         public final int                                        REINDEXNOTE=3;\r
72         public boolean                                          keepRunning;\r
73         private final QDomDocument                      doc;\r
74         private static String                           regex = Global.getWordRegex();\r
75         private final DatabaseConnection        conn;\r
76         private volatile LinkedBlockingQueue<String> workQueue;\r
77         private static int MAX_QUEUED_WAITING = 1000;\r
78         public boolean interrupt;\r
79         public boolean idle;\r
80         public boolean indexAttachmentsLocally = true;\r
81         public volatile IndexSignal                     signal;\r
82 \r
83         \r
84         public IndexRunner(String logname, String u, String uid, String pswd, String cpswd) {\r
85                 logger = new ApplicationLogger(logname);\r
86                 conn = new DatabaseConnection(logger, u, uid, pswd, cpswd);\r
87                 indexType = SCAN;\r
88                 guid = null;\r
89                 keepRunning = true;\r
90                 doc = new QDomDocument();\r
91                 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);  \r
92         }\r
93         \r
94         public void setIndexType(int t) {\r
95                 indexType = t;\r
96         }\r
97         \r
98         \r
99         @Override\r
100         public void run() {\r
101                 thread().setPriority(Thread.MIN_PRIORITY);\r
102                 noteSignal = new NoteSignal();\r
103                 resourceSignal = new NoteResourceSignal();\r
104                 signal = new IndexSignal();\r
105                 logger.log(logger.EXTREME, "Starting index thread ");\r
106                 while (keepRunning) {\r
107                         idle=true;\r
108                         try {\r
109                                 String work = workQueue.take();\r
110                                 idle=false;\r
111                                 if (work.startsWith("SCAN")) {\r
112                                         guid=null;\r
113                                         interrupt = false;\r
114                                         indexType = SCAN;\r
115                                 }\r
116                                 if (work.startsWith("REINDEXALL")) {\r
117                                         guid = null;\r
118                                         indexType=REINDEXALL;\r
119                                 }\r
120                                 if (work.startsWith("REINDEXNOTE")) {\r
121                                         work = work.replace("REINDEXNOTE ", "");\r
122                                         guid = work;\r
123                                         indexType = REINDEXNOTE;\r
124                                 }\r
125                                 if (work.startsWith("STOP")) {\r
126                                         keepRunning = false;\r
127                                         guid = null;\r
128                                 }\r
129                                 logger.log(logger.EXTREME, "Type:" +indexType);\r
130                                 if (indexType == SCAN && keepRunning) {\r
131                                         logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");\r
132                                         scanUnindexed();\r
133                                         setIndexType(0);\r
134                                 }\r
135                                 if (indexType == REINDEXALL && keepRunning) {\r
136                                         logger.log(logger.MEDIUM, "Marking all for reindex");\r
137                                         reindexAll();\r
138                                         setIndexType(0);\r
139                                 }\r
140                                 if (indexType == REINDEXNOTE && keepRunning) {\r
141                                         reindexNote();\r
142                                 }\r
143                         } catch (InterruptedException e) {\r
144                                 // TODO Auto-generated catch block\r
145                                 e.printStackTrace();\r
146                         }\r
147                 }\r
148                 logger.log(logger.EXTREME, "Shutting down database");\r
149                 conn.dbShutdown();\r
150                 logger.log(logger.EXTREME, "Database shut down.  Exiting thread");\r
151         }\r
152         \r
153         // Reindex a note\r
154         public void indexNoteContent() {\r
155                 \r
156                 logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
157                 \r
158                 logger.log(logger.EXTREME, "Getting note content");\r
159                 Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
160                 String data = n.getContent();\r
161                 data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
162                 \r
163                 logger.log(logger.EXTREME, "Removing any encrypted data");\r
164                 data = removeEnCrypt(data.toString());\r
165                 logger.log(logger.EXTREME, "Removing xml markups");\r
166                 String text =  removeTags(StringEscapeUtils.unescapeHtml(data) +" "+\r
167                 n.getTitle());\r
168                                 \r
169                 logger.log(logger.EXTREME, "Splitting words");\r
170                 String[] result = text.toString().split(regex);\r
171                 logger.log(logger.EXTREME, "Deleting existing words for note from index");\r
172                 conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");\r
173                 \r
174                 logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
175                 for (int j=0; j<result.length && keepRunning; j++) {\r
176                         if (!result[j].trim().equals("")) {\r
177                                 logger.log(logger.EXTREME, "Result word: " +result[j]);\r
178                                 addToIndex(guid, result[j], "CONTENT");\r
179                         }\r
180                 }\r
181                 // If we were interrupted, we will reindex this note next time\r
182                 if (Global.keepRunning) {\r
183                         logger.log(logger.EXTREME, "Resetting note guid needed");\r
184                         conn.getNoteTable().setIndexNeeded(guid, false);\r
185                 }\r
186                 logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
187         }\r
188         \r
189         \r
190         private String removeTags(String text) {\r
191                 StringBuffer buffer = new StringBuffer(text);\r
192                 boolean inTag = false;\r
193                 for (int i=buffer.length()-1; i>=0; i--) {\r
194                         if (buffer.charAt(i) == '>')\r
195                                 inTag = true;\r
196                         if (buffer.charAt(i) == '<')\r
197                                 inTag = false;\r
198                         if (inTag || buffer.charAt(i) == '<')\r
199                                 buffer.deleteCharAt(i);\r
200                 }\r
201                 \r
202                 return buffer.toString();\r
203         }\r
204 \r
205         \r
206         public synchronized boolean addWork(String request) {\r
207                 if (workQueue.size() == 0) {\r
208                         workQueue.offer(request);\r
209                         return true;\r
210                 }\r
211                 return false;\r
212         }\r
213         \r
214         public synchronized int getWorkQueueSize() {\r
215                 return workQueue.size();\r
216         }\r
217         \r
218         public void indexResource() {\r
219                 \r
220                 if (guid == null)\r
221                         return;\r
222                 \r
223                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);\r
224                 if (r == null || r.getRecognition() == null || r.getRecognition().getBody() == null || r.getRecognition().getBody().length == 0) \r
225                         resourceBinary = new QByteArray(" ");\r
226                 else\r
227                         resourceBinary = new QByteArray(r.getRecognition().getBody());\r
228                 \r
229                 conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");\r
230                 // This is due to an old bug & can be removed at some point in the future 11/23/2010\r
231                 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");   \r
232                         \r
233                 doc.setContent(resourceBinary);\r
234                 QDomElement docElem = doc.documentElement();\r
235                         \r
236                 // look for text tags\r
237                 QDomNodeList anchors = docElem.elementsByTagName("t");\r
238                 for (int i=0; i<anchors.length() && keepRunning; i++) {\r
239                         QDomElement enmedia = anchors.at(i).toElement();\r
240                         String weight = new String(enmedia.attribute("w"));\r
241                         String text = new String(enmedia.text()).toLowerCase();\r
242                         if (!text.equals("")) {\r
243                                 conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));\r
244                         }\r
245                 }\r
246                 \r
247                 if (Global.keepRunning && indexAttachmentsLocally) {\r
248                         indexResourceContent(guid);\r
249                 }\r
250                                 \r
251                 if (Global.keepRunning)\r
252                         conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
253         }\r
254         \r
255         private void indexResourceContent(String guid) {\r
256                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);\r
257                 if (r.getMime().equalsIgnoreCase("application/pdf")) {\r
258                         indexResourcePDF(r);\r
259                         return;\r
260                 }\r
261                 if (r.getMime().equalsIgnoreCase("application/docx") || \r
262                         r.getMime().equalsIgnoreCase("application/xlsx") || \r
263                         r.getMime().equalsIgnoreCase("application/pptx")) {\r
264                         indexResourceOOXML(r);\r
265                         return;\r
266                 }\r
267                 if (r.getMime().equalsIgnoreCase("application/vsd") ||\r
268                         r.getMime().equalsIgnoreCase("application/ppt") ||\r
269                         r.getMime().equalsIgnoreCase("application/xls") ||\r
270                         r.getMime().equalsIgnoreCase("application/msg") ||\r
271                         r.getMime().equalsIgnoreCase("application/doc")) {\r
272                                 indexResourceOffice(r);\r
273                                 return;\r
274                 }\r
275                 if (r.getMime().equalsIgnoreCase("application/rtf")) {\r
276                                         indexResourceRTF(r);\r
277                                         return;\r
278                 }\r
279                 if (r.getMime().equalsIgnoreCase("application/odf") ||\r
280                         r.getMime().equalsIgnoreCase("application/odt") ||\r
281                         r.getMime().equalsIgnoreCase("application/odp") ||\r
282                         r.getMime().equalsIgnoreCase("application/odg") ||\r
283                         r.getMime().equalsIgnoreCase("application/odb") ||\r
284                         r.getMime().equalsIgnoreCase("application/ods")) {\r
285                         indexResourceODF(r);\r
286                         return;\r
287                 }\r
288         }\r
289 \r
290 \r
291         private void indexResourceRTF(Resource r) {\r
292 \r
293                 QTemporaryFile f = writeResource(r.getData());\r
294                 if (!keepRunning) {\r
295                         return;\r
296                 }\r
297                 \r
298                 InputStream input;\r
299                 try {\r
300                         input = new FileInputStream(new File(f.fileName()));\r
301                         ContentHandler textHandler = new BodyContentHandler(-1);\r
302                         Metadata metadata = new Metadata();\r
303                         RTFParser parser = new RTFParser();     \r
304                         ParseContext context = new ParseContext();\r
305                         parser.parse(input, textHandler, metadata, context);\r
306                         String[] result = textHandler.toString().split(regex);\r
307                         for (int i=0; i<result.length && keepRunning; i++) {\r
308                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
309                         }\r
310                         input.close();\r
311                 \r
312                         f.close();\r
313                 } catch (java.lang.ClassCastException e) {\r
314                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
315                 } catch (FileNotFoundException e) {\r
316                         // TODO Auto-generated catch block\r
317                         e.printStackTrace();\r
318                 } catch (IOException e) {\r
319                         // TODO Auto-generated catch block\r
320                         e.printStackTrace();\r
321                 } catch (SAXException e) {\r
322                         // TODO Auto-generated catch block\r
323                         e.printStackTrace();\r
324                 } catch (TikaException e) {\r
325                         // TODO Auto-generated catch block\r
326                         e.printStackTrace();\r
327                 } catch (Exception e) {\r
328                         e.printStackTrace();\r
329                 }\r
330         }\r
331 \r
332         \r
333         private void indexResourceODF(Resource r) {\r
334 \r
335                 QTemporaryFile f = writeResource(r.getData());\r
336                 if (!keepRunning) {\r
337                         return;\r
338                 }\r
339                 \r
340                 InputStream input;\r
341                 try {\r
342                         input = new FileInputStream(new File(f.fileName()));\r
343                         ContentHandler textHandler = new BodyContentHandler(-1);\r
344                         Metadata metadata = new Metadata();\r
345                         OpenDocumentParser parser = new OpenDocumentParser();   \r
346                         ParseContext context = new ParseContext();\r
347                         parser.parse(input, textHandler, metadata, context);\r
348                         String[] result = textHandler.toString().split(regex);\r
349                         for (int i=0; i<result.length && keepRunning; i++) {\r
350                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
351                         }\r
352                         input.close();\r
353                 \r
354                         f.close();\r
355                 } catch (java.lang.ClassCastException e) {\r
356                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
357                 } catch (FileNotFoundException e) {\r
358                         // TODO Auto-generated catch block\r
359                         e.printStackTrace();\r
360                 } catch (IOException e) {\r
361                         // TODO Auto-generated catch block\r
362                         e.printStackTrace();\r
363                 } catch (SAXException e) {\r
364                         // TODO Auto-generated catch block\r
365                         e.printStackTrace();\r
366                 } catch (TikaException e) {\r
367                         // TODO Auto-generated catch block\r
368                         e.printStackTrace();\r
369                 } catch (Exception e) {\r
370                         e.printStackTrace();\r
371                 }\r
372         }\r
373 \r
374         \r
375         private void indexResourceOffice(Resource r) {\r
376 \r
377                 QTemporaryFile f = writeResource(r.getData());\r
378                 if (!keepRunning) {\r
379                         return;\r
380                 }\r
381                 \r
382                 InputStream input;\r
383                 try {\r
384                         input = new FileInputStream(new File(f.fileName()));\r
385                         ContentHandler textHandler = new BodyContentHandler(-1);\r
386                         Metadata metadata = new Metadata();\r
387                         OfficeParser parser = new OfficeParser();       \r
388                         ParseContext context = new ParseContext();\r
389                         parser.parse(input, textHandler, metadata, context);\r
390                         String[] result = textHandler.toString().split(regex);\r
391                         for (int i=0; i<result.length && keepRunning; i++) {\r
392                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
393                         }\r
394                         input.close();\r
395                 \r
396                         f.close();\r
397                 } catch (java.lang.ClassCastException e) {\r
398                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
399                 } catch (FileNotFoundException e) {\r
400                         // TODO Auto-generated catch block\r
401                         e.printStackTrace();\r
402                 } catch (IOException e) {\r
403                         // TODO Auto-generated catch block\r
404                         e.printStackTrace();\r
405                 } catch (SAXException e) {\r
406                         // TODO Auto-generated catch block\r
407                         e.printStackTrace();\r
408                 } catch (TikaException e) {\r
409                         // TODO Auto-generated catch block\r
410                         e.printStackTrace();\r
411                 } catch (Exception e) {\r
412                         e.printStackTrace();\r
413                 }\r
414         }\r
415 \r
416         \r
417         \r
418         private void indexResourcePDF(Resource r) {\r
419 \r
420                 QTemporaryFile f = writeResource(r.getData());\r
421                 if (!keepRunning) {\r
422                         return;\r
423                 }\r
424                 \r
425                 InputStream input;\r
426                 try {                   \r
427                         input = new FileInputStream(new File(f.fileName()));\r
428                         ContentHandler textHandler = new BodyContentHandler(-1);\r
429                         Metadata metadata = new Metadata();\r
430                         PDFParser parser = new PDFParser();     \r
431                         ParseContext context = new ParseContext();\r
432                         parser.parse(input, textHandler, metadata, context);\r
433                         String[] result = textHandler.toString().split(regex);\r
434                         for (int i=0; i<result.length && keepRunning; i++) {\r
435                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
436                         }\r
437                         input.close();\r
438                 \r
439                         f.close();\r
440                 } catch (java.lang.ClassCastException e) {\r
441                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
442                 } catch (FileNotFoundException e) {\r
443                         e.printStackTrace();\r
444                 } catch (IOException e) {\r
445                         e.printStackTrace();\r
446                 } catch (SAXException e) {\r
447                         e.printStackTrace();\r
448                 } catch (TikaException e) {\r
449                         e.printStackTrace();\r
450                 } catch (Exception e) {\r
451                         e.printStackTrace();\r
452                 }\r
453         }\r
454         \r
455         \r
456         private void indexResourceOOXML(Resource r) {\r
457 \r
458                 QTemporaryFile f = writeResource(r.getData());\r
459                 if (!keepRunning) {\r
460                         return;\r
461                 }\r
462                 \r
463                 InputStream input;\r
464                 try {\r
465                         input = new FileInputStream(new File(f.fileName()));\r
466                         ContentHandler textHandler = new BodyContentHandler(-1);\r
467                         Metadata metadata = new Metadata();\r
468                         OOXMLParser parser = new OOXMLParser(); \r
469                         ParseContext context = new ParseContext();\r
470                         parser.parse(input, textHandler, metadata, context);\r
471                         String[] result = textHandler.toString().split(regex);\r
472                         for (int i=0; i<result.length && keepRunning; i++) {\r
473                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
474                         }\r
475                         input.close();\r
476                 \r
477                         f.close();\r
478                 } catch (java.lang.ClassCastException e) {\r
479                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
480                 } catch (FileNotFoundException e) {\r
481                         // TODO Auto-generated catch block\r
482                         e.printStackTrace();\r
483                 } catch (IOException e) {\r
484                         // TODO Auto-generated catch block\r
485                         e.printStackTrace();\r
486                 } catch (SAXException e) {\r
487                         // TODO Auto-generated catch block\r
488                         e.printStackTrace();\r
489                 } catch (TikaException e) {\r
490                         // TODO Auto-generated catch block\r
491                         e.printStackTrace();\r
492                 } catch (Exception e) {\r
493                         e.printStackTrace();\r
494                 }\r
495         }\r
496         \r
497 \r
498         \r
499         private QTemporaryFile writeResource(Data d) {\r
500                 QTemporaryFile newFile = new QTemporaryFile();\r
501                 newFile.open(OpenModeFlag.WriteOnly);\r
502                 newFile.write(d.getBody());\r
503                 newFile.close();\r
504                 return newFile;\r
505         }\r
506 \r
507         \r
508         private String removeEnCrypt(String content) {\r
509                 int index = content.indexOf("<en-crypt");\r
510                 int endPos;\r
511                 boolean tagFound = true;\r
512                 while (tagFound && keepRunning) {\r
513                         endPos = content.indexOf("</en-crypt>", index)+11;\r
514                         if (endPos > -1 && index > -1) {\r
515                                 content = content.substring(0,index)+content.substring(endPos);\r
516                                 index = content.indexOf("<en-crypt");\r
517                         } else {\r
518                                 tagFound = false;\r
519                         }\r
520                 }\r
521                 return content;\r
522         }\r
523 \r
524         \r
525         private void addToIndex(String guid, String word, String type) {\r
526                 if (word.length() > 0) {\r
527                         // We have a good word, now let's trim off junk at the beginning or end\r
528                         StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
529                         conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
530                 }\r
531                 return;\r
532         }\r
533         \r
534         private void scanUnindexed() {\r
535                 List<String> notes = conn.getNoteTable().getUnindexed();\r
536                 guid = null;\r
537                 boolean started = false;\r
538                 if (notes.size() > 0) {\r
539                         signal.indexStarted.emit();\r
540                         started = true;\r
541                 }\r
542                 for (int i=0; i<notes.size() && !interrupt && keepRunning; i++) {\r
543                         guid = notes.get(i);\r
544                         if (guid != null && keepRunning) {\r
545                                 indexNoteContent();\r
546                         }\r
547                 }\r
548                 \r
549                 List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();\r
550                 if (unindexedResources.size() > 0 && !started) {\r
551                         signal.indexStarted.emit();\r
552                         started = true;\r
553                 }\r
554                 for (int i=0; i<unindexedResources.size()&& !interrupt && keepRunning; i++) {\r
555                         guid = unindexedResources.get(i);\r
556                         if (keepRunning) {\r
557                                 indexResource();\r
558                         }\r
559                 }\r
560                 if (started && keepRunning && !interrupt) \r
561                         signal.indexFinished.emit();\r
562         }\r
563         \r
564         private void reindexNote() {\r
565                 if (guid == null)\r
566                         return;\r
567                 conn.getNoteTable().setIndexNeeded(guid, true);\r
568         }\r
569         \r
570         private void reindexAll() {\r
571                 conn.getNoteTable().reindexAllNotes();\r
572                 conn.getNoteTable().noteResourceTable.reindexAll(); \r
573         }\r
574 \r
575 }\r