OSDN Git Service

Correct external editor not setting notebook list properly.
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / threads / IndexRunner.java
1 /*\r
2  * This file is part of NeverNote \r
3  * Copyright 2009 Randy Baumgarte\r
4  * \r
5  * This file may be licensed under the terms of of the\r
6  * GNU General Public License Version 2 (the ``GPL'').\r
7  *\r
8  * Software distributed under the License is distributed\r
9  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
10  * express or implied. See the GPL for the specific language\r
11  * governing rights and limitations.\r
12  *\r
13  * You should have received a copy of the GPL along with this\r
14  * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
15  * or write to the Free Software Foundation, Inc.,\r
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
17  *\r
18 */\r
19 \r
20 package cx.fbn.nevernote.threads;\r
21 \r
22 import java.io.File;\r
23 import java.io.FileInputStream;\r
24 import java.io.FileNotFoundException;\r
25 import java.io.IOException;\r
26 import java.io.InputStream;\r
27 import java.util.List;\r
28 import java.util.concurrent.LinkedBlockingQueue;\r
29 \r
30 import org.apache.commons.lang.StringEscapeUtils;\r
31 import org.apache.tika.exception.TikaException;\r
32 import org.apache.tika.metadata.Metadata;\r
33 import org.apache.tika.parser.ParseContext;\r
34 import org.apache.tika.parser.microsoft.OfficeParser;\r
35 import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;\r
36 import org.apache.tika.parser.odf.OpenDocumentParser;\r
37 import org.apache.tika.parser.pdf.PDFParser;\r
38 import org.apache.tika.parser.rtf.RTFParser;\r
39 import org.apache.tika.sax.BodyContentHandler;\r
40 import org.xml.sax.ContentHandler;\r
41 import org.xml.sax.SAXException;\r
42 \r
43 import com.evernote.edam.type.Data;\r
44 import com.evernote.edam.type.Note;\r
45 import com.evernote.edam.type.Resource;\r
46 import com.trolltech.qt.core.QByteArray;\r
47 import com.trolltech.qt.core.QIODevice.OpenModeFlag;\r
48 import com.trolltech.qt.core.QObject;\r
49 import com.trolltech.qt.core.QTemporaryFile;\r
50 import com.trolltech.qt.xml.QDomDocument;\r
51 import com.trolltech.qt.xml.QDomElement;\r
52 import com.trolltech.qt.xml.QDomNodeList;\r
53 \r
54 import cx.fbn.nevernote.Global;\r
55 import cx.fbn.nevernote.signals.IndexSignal;\r
56 import cx.fbn.nevernote.signals.NoteResourceSignal;\r
57 import cx.fbn.nevernote.signals.NoteSignal;\r
58 import cx.fbn.nevernote.sql.DatabaseConnection;\r
59 import cx.fbn.nevernote.utilities.ApplicationLogger;\r
60 \r
61 public class IndexRunner extends QObject implements Runnable {\r
62         \r
63         private final ApplicationLogger         logger;\r
64         private String                                          guid;\r
65         private QByteArray                                      resourceBinary;\r
66         public volatile NoteSignal                      noteSignal;\r
67         public volatile NoteResourceSignal      resourceSignal;\r
68         private int                                                     indexType;\r
69         public final int                                        SCAN=1; \r
70         public final int                                        REINDEXALL=2;\r
71         public final int                                        REINDEXNOTE=3;\r
72         public boolean                                          keepRunning;\r
73         private final QDomDocument                      doc;\r
74         private static String                           regex = Global.getWordRegex();\r
75         private final DatabaseConnection        conn;\r
76         private volatile LinkedBlockingQueue<String> workQueue;\r
77         private static int MAX_QUEUED_WAITING = 1000;\r
78         public boolean interrupt;\r
79         public boolean idle;\r
80         public boolean indexAttachmentsLocally = true;\r
81         public volatile IndexSignal                     signal;\r
82 \r
83         \r
84         public IndexRunner(String logname, String u, String uid, String pswd, String cpswd) {\r
85                 logger = new ApplicationLogger(logname);\r
86                 conn = new DatabaseConnection(logger, u, uid, pswd, cpswd);\r
87                 indexType = SCAN;\r
88                 guid = null;\r
89                 keepRunning = true;\r
90                 doc = new QDomDocument();\r
91                 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);  \r
92         }\r
93         \r
94         public void setIndexType(int t) {\r
95                 indexType = t;\r
96         }\r
97         \r
98         \r
99         @Override\r
100         public void run() {\r
101                 thread().setPriority(Thread.MIN_PRIORITY);\r
102                 noteSignal = new NoteSignal();\r
103                 resourceSignal = new NoteResourceSignal();\r
104                 signal = new IndexSignal();\r
105                 logger.log(logger.EXTREME, "Starting index thread ");\r
106                 while (keepRunning) {\r
107                         idle=true;\r
108                         try {\r
109                                 String work = workQueue.take();\r
110                                 idle=false;\r
111                                 if (work.startsWith("SCAN")) {\r
112                                         guid=null;\r
113                                         interrupt = false;\r
114                                         indexType = SCAN;\r
115                                 }\r
116                                 if (work.startsWith("REINDEXALL")) {\r
117                                         guid = null;\r
118                                         indexType=REINDEXALL;\r
119                                 }\r
120                                 if (work.startsWith("REINDEXNOTE")) {\r
121                                         work = work.replace("REINDEXNOTE ", "");\r
122                                         guid = work;\r
123                                         indexType = REINDEXNOTE;\r
124                                 }\r
125                                 if (work.startsWith("STOP")) {\r
126                                         keepRunning = false;\r
127                                         guid = null;\r
128                                 }\r
129                                 logger.log(logger.EXTREME, "Type:" +indexType);\r
130                                 if (indexType == SCAN && keepRunning) {\r
131                                         logger.log(logger.MEDIUM, "Scanning for unindexed notes & resources");\r
132                                         scanUnindexed();\r
133                                         setIndexType(0);\r
134                                 }\r
135                                 if (indexType == REINDEXALL && keepRunning) {\r
136                                         logger.log(logger.MEDIUM, "Marking all for reindex");\r
137                                         reindexAll();\r
138                                         setIndexType(0);\r
139                                 }\r
140                                 if (indexType == REINDEXNOTE && keepRunning) {\r
141                                         reindexNote();\r
142                                 }\r
143                         } catch (InterruptedException e) {\r
144                                 // TODO Auto-generated catch block\r
145                                 e.printStackTrace();\r
146                         }\r
147                 }\r
148                 logger.log(logger.EXTREME, "Shutting down database");\r
149                 conn.dbShutdown();\r
150                 logger.log(logger.EXTREME, "Database shut down.  Exiting thread");\r
151         }\r
152         \r
153         // Reindex a note\r
154         public void indexNoteContent() {\r
155                 \r
156                 logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
157                 \r
158                 logger.log(logger.EXTREME, "Getting note content");\r
159                 Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
160                 String data = n.getContent();\r
161                 data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
162                 System.out.println(data);\r
163                 \r
164                 logger.log(logger.EXTREME, "Removing any encrypted data");\r
165                 data = removeEnCrypt(data.toString());\r
166                 logger.log(logger.EXTREME, "Removing xml markups");\r
167                 String text =  removeTags(StringEscapeUtils.unescapeHtml(data) +" "+\r
168                 n.getTitle());\r
169                                 \r
170                 logger.log(logger.EXTREME, "Splitting words");\r
171                 String[] result = text.toString().split(regex);\r
172                 logger.log(logger.EXTREME, "Deleting existing words for note from index");\r
173                 conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");\r
174                 \r
175                 logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
176                 for (int j=0; j<result.length && keepRunning; j++) {\r
177                         if (!result[j].trim().equals("")) {\r
178                                 logger.log(logger.EXTREME, "Result word: " +result[j]);\r
179                                 addToIndex(guid, result[j], "CONTENT");\r
180                         }\r
181                 }\r
182                 // If we were interrupted, we will reindex this note next time\r
183                 if (Global.keepRunning) {\r
184                         logger.log(logger.EXTREME, "Resetting note guid needed");\r
185                         conn.getNoteTable().setIndexNeeded(guid, false);\r
186                 }\r
187                 logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
188         }\r
189         \r
190         \r
191         private String removeTags(String text) {\r
192                 StringBuffer buffer = new StringBuffer(text);\r
193                 boolean inTag = false;\r
194                 for (int i=buffer.length()-1; i>=0; i--) {\r
195                         if (buffer.charAt(i) == '>')\r
196                                 inTag = true;\r
197                         if (buffer.charAt(i) == '<')\r
198                                 inTag = false;\r
199                         if (inTag || buffer.charAt(i) == '<')\r
200                                 buffer.deleteCharAt(i);\r
201                 }\r
202                 \r
203                 return buffer.toString();\r
204         }\r
205 \r
206         \r
207         public synchronized boolean addWork(String request) {\r
208                 if (workQueue.size() == 0) {\r
209                         workQueue.offer(request);\r
210                         return true;\r
211                 }\r
212                 return false;\r
213         }\r
214         \r
215         public synchronized int getWorkQueueSize() {\r
216                 return workQueue.size();\r
217         }\r
218         \r
219         public void indexResource() {\r
220                 \r
221                 if (guid == null)\r
222                         return;\r
223                 \r
224                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);\r
225                 if (r == null || r.getRecognition() == null || r.getRecognition().getBody() == null || r.getRecognition().getBody().length == 0) \r
226                         resourceBinary = new QByteArray(" ");\r
227                 else\r
228                         resourceBinary = new QByteArray(r.getRecognition().getBody());\r
229                 \r
230                 conn.getWordsTable().expungeFromWordIndex(r.getNoteGuid(), "RESOURCE");\r
231                 // This is due to an old bug & can be removed at some point in the future 11/23/2010\r
232                 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");   \r
233                         \r
234                 doc.setContent(resourceBinary);\r
235                 QDomElement docElem = doc.documentElement();\r
236                         \r
237                 // look for text tags\r
238                 QDomNodeList anchors = docElem.elementsByTagName("t");\r
239                 for (int i=0; i<anchors.length() && keepRunning; i++) {\r
240                         QDomElement enmedia = anchors.at(i).toElement();\r
241                         String weight = new String(enmedia.attribute("w"));\r
242                         String text = new String(enmedia.text()).toLowerCase();\r
243                         if (!text.equals("")) {\r
244                                 conn.getWordsTable().addWordToNoteIndex(r.getNoteGuid(), text, "RESOURCE", new Integer(weight));\r
245                         }\r
246                 }\r
247                 \r
248                 if (Global.keepRunning && indexAttachmentsLocally) {\r
249                         indexResourceContent(guid);\r
250                 }\r
251                                 \r
252                 if (Global.keepRunning)\r
253                         conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
254         }\r
255         \r
256         private void indexResourceContent(String guid) {\r
257                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResource(guid, true);\r
258                 if (r.getMime().equalsIgnoreCase("application/pdf")) {\r
259                         indexResourcePDF(r);\r
260                         return;\r
261                 }\r
262                 if (r.getMime().equalsIgnoreCase("application/docx") || \r
263                         r.getMime().equalsIgnoreCase("application/xlsx") || \r
264                         r.getMime().equalsIgnoreCase("application/pptx")) {\r
265                         indexResourceOOXML(r);\r
266                         return;\r
267                 }\r
268                 if (r.getMime().equalsIgnoreCase("application/vsd") ||\r
269                         r.getMime().equalsIgnoreCase("application/ppt") ||\r
270                         r.getMime().equalsIgnoreCase("application/xls") ||\r
271                         r.getMime().equalsIgnoreCase("application/msg") ||\r
272                         r.getMime().equalsIgnoreCase("application/doc")) {\r
273                                 indexResourceOffice(r);\r
274                                 return;\r
275                 }\r
276                 if (r.getMime().equalsIgnoreCase("application/rtf")) {\r
277                                         indexResourceRTF(r);\r
278                                         return;\r
279                 }\r
280                 if (r.getMime().equalsIgnoreCase("application/odf") ||\r
281                         r.getMime().equalsIgnoreCase("application/odt") ||\r
282                         r.getMime().equalsIgnoreCase("application/odp") ||\r
283                         r.getMime().equalsIgnoreCase("application/odg") ||\r
284                         r.getMime().equalsIgnoreCase("application/odb") ||\r
285                         r.getMime().equalsIgnoreCase("application/ods")) {\r
286                         indexResourceODF(r);\r
287                         return;\r
288                 }\r
289         }\r
290 \r
291 \r
292         private void indexResourceRTF(Resource r) {\r
293 \r
294                 QTemporaryFile f = writeResource(r.getData());\r
295                 if (!keepRunning) {\r
296                         return;\r
297                 }\r
298                 \r
299                 InputStream input;\r
300                 try {\r
301                         input = new FileInputStream(new File(f.fileName()));\r
302                         ContentHandler textHandler = new BodyContentHandler(-1);\r
303                         Metadata metadata = new Metadata();\r
304                         RTFParser parser = new RTFParser();     \r
305                         ParseContext context = new ParseContext();\r
306                         parser.parse(input, textHandler, metadata, context);\r
307                         String[] result = textHandler.toString().split(regex);\r
308                         for (int i=0; i<result.length && keepRunning; i++) {\r
309                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
310                         }\r
311                         input.close();\r
312                 \r
313                         f.close();\r
314                 } catch (java.lang.ClassCastException e) {\r
315                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
316                 } catch (FileNotFoundException e) {\r
317                         // TODO Auto-generated catch block\r
318                         e.printStackTrace();\r
319                 } catch (IOException e) {\r
320                         // TODO Auto-generated catch block\r
321                         e.printStackTrace();\r
322                 } catch (SAXException e) {\r
323                         // TODO Auto-generated catch block\r
324                         e.printStackTrace();\r
325                 } catch (TikaException e) {\r
326                         // TODO Auto-generated catch block\r
327                         e.printStackTrace();\r
328                 } catch (Exception e) {\r
329                         e.printStackTrace();\r
330                 }\r
331         }\r
332 \r
333         \r
334         private void indexResourceODF(Resource r) {\r
335 \r
336                 QTemporaryFile f = writeResource(r.getData());\r
337                 if (!keepRunning) {\r
338                         return;\r
339                 }\r
340                 \r
341                 InputStream input;\r
342                 try {\r
343                         input = new FileInputStream(new File(f.fileName()));\r
344                         ContentHandler textHandler = new BodyContentHandler(-1);\r
345                         Metadata metadata = new Metadata();\r
346                         OpenDocumentParser parser = new OpenDocumentParser();   \r
347                         ParseContext context = new ParseContext();\r
348                         parser.parse(input, textHandler, metadata, context);\r
349                         String[] result = textHandler.toString().split(regex);\r
350                         for (int i=0; i<result.length && keepRunning; i++) {\r
351                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
352                         }\r
353                         input.close();\r
354                 \r
355                         f.close();\r
356                 } catch (java.lang.ClassCastException e) {\r
357                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
358                 } catch (FileNotFoundException e) {\r
359                         // TODO Auto-generated catch block\r
360                         e.printStackTrace();\r
361                 } catch (IOException e) {\r
362                         // TODO Auto-generated catch block\r
363                         e.printStackTrace();\r
364                 } catch (SAXException e) {\r
365                         // TODO Auto-generated catch block\r
366                         e.printStackTrace();\r
367                 } catch (TikaException e) {\r
368                         // TODO Auto-generated catch block\r
369                         e.printStackTrace();\r
370                 } catch (Exception e) {\r
371                         e.printStackTrace();\r
372                 }\r
373         }\r
374 \r
375         \r
376         private void indexResourceOffice(Resource r) {\r
377 \r
378                 QTemporaryFile f = writeResource(r.getData());\r
379                 if (!keepRunning) {\r
380                         return;\r
381                 }\r
382                 \r
383                 InputStream input;\r
384                 try {\r
385                         input = new FileInputStream(new File(f.fileName()));\r
386                         ContentHandler textHandler = new BodyContentHandler(-1);\r
387                         Metadata metadata = new Metadata();\r
388                         OfficeParser parser = new OfficeParser();       \r
389                         ParseContext context = new ParseContext();\r
390                         parser.parse(input, textHandler, metadata, context);\r
391                         String[] result = textHandler.toString().split(regex);\r
392                         for (int i=0; i<result.length && keepRunning; i++) {\r
393                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
394                         }\r
395                         input.close();\r
396                 \r
397                         f.close();\r
398                 } catch (java.lang.ClassCastException e) {\r
399                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
400                 } catch (FileNotFoundException e) {\r
401                         // TODO Auto-generated catch block\r
402                         e.printStackTrace();\r
403                 } catch (IOException e) {\r
404                         // TODO Auto-generated catch block\r
405                         e.printStackTrace();\r
406                 } catch (SAXException e) {\r
407                         // TODO Auto-generated catch block\r
408                         e.printStackTrace();\r
409                 } catch (TikaException e) {\r
410                         // TODO Auto-generated catch block\r
411                         e.printStackTrace();\r
412                 } catch (Exception e) {\r
413                         e.printStackTrace();\r
414                 }\r
415         }\r
416 \r
417         \r
418         \r
419         private void indexResourcePDF(Resource r) {\r
420 \r
421                 QTemporaryFile f = writeResource(r.getData());\r
422                 if (!keepRunning) {\r
423                         return;\r
424                 }\r
425                 \r
426                 InputStream input;\r
427                 try {                   \r
428                         input = new FileInputStream(new File(f.fileName()));\r
429                         ContentHandler textHandler = new BodyContentHandler(-1);\r
430                         Metadata metadata = new Metadata();\r
431                         PDFParser parser = new PDFParser();     \r
432                         ParseContext context = new ParseContext();\r
433                         parser.parse(input, textHandler, metadata, context);\r
434                         String[] result = textHandler.toString().split(regex);\r
435                         for (int i=0; i<result.length && keepRunning; i++) {\r
436                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
437                         }\r
438                         input.close();\r
439                 \r
440                         f.close();\r
441                 } catch (java.lang.ClassCastException e) {\r
442                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
443                 } catch (FileNotFoundException e) {\r
444                         e.printStackTrace();\r
445                 } catch (IOException e) {\r
446                         e.printStackTrace();\r
447                 } catch (SAXException e) {\r
448                         e.printStackTrace();\r
449                 } catch (TikaException e) {\r
450                         e.printStackTrace();\r
451                 } catch (Exception e) {\r
452                         e.printStackTrace();\r
453                 }\r
454         }\r
455         \r
456         \r
457         private void indexResourceOOXML(Resource r) {\r
458 \r
459                 QTemporaryFile f = writeResource(r.getData());\r
460                 if (!keepRunning) {\r
461                         return;\r
462                 }\r
463                 \r
464                 InputStream input;\r
465                 try {\r
466                         input = new FileInputStream(new File(f.fileName()));\r
467                         ContentHandler textHandler = new BodyContentHandler(-1);\r
468                         Metadata metadata = new Metadata();\r
469                         OOXMLParser parser = new OOXMLParser(); \r
470                         ParseContext context = new ParseContext();\r
471                         parser.parse(input, textHandler, metadata, context);\r
472                         String[] result = textHandler.toString().split(regex);\r
473                         for (int i=0; i<result.length && keepRunning; i++) {\r
474                                 addToIndex(r.getNoteGuid(), result[i], "RESOURCE");\r
475                         }\r
476                         input.close();\r
477                 \r
478                         f.close();\r
479                 } catch (java.lang.ClassCastException e) {\r
480                         logger.log(logger.LOW, "Cast exception: " +e.getMessage());\r
481                 } catch (FileNotFoundException e) {\r
482                         // TODO Auto-generated catch block\r
483                         e.printStackTrace();\r
484                 } catch (IOException e) {\r
485                         // TODO Auto-generated catch block\r
486                         e.printStackTrace();\r
487                 } catch (SAXException e) {\r
488                         // TODO Auto-generated catch block\r
489                         e.printStackTrace();\r
490                 } catch (TikaException e) {\r
491                         // TODO Auto-generated catch block\r
492                         e.printStackTrace();\r
493                 } catch (Exception e) {\r
494                         e.printStackTrace();\r
495                 }\r
496         }\r
497         \r
498 \r
499         \r
500         private QTemporaryFile writeResource(Data d) {\r
501                 QTemporaryFile newFile = new QTemporaryFile();\r
502                 newFile.open(OpenModeFlag.WriteOnly);\r
503                 newFile.write(d.getBody());\r
504                 newFile.close();\r
505                 return newFile;\r
506         }\r
507 \r
508         \r
509         private String removeEnCrypt(String content) {\r
510                 int index = content.indexOf("<en-crypt");\r
511                 int endPos;\r
512                 boolean tagFound = true;\r
513                 while (tagFound && keepRunning) {\r
514                         endPos = content.indexOf("</en-crypt>", index)+11;\r
515                         if (endPos > -1 && index > -1) {\r
516                                 content = content.substring(0,index)+content.substring(endPos);\r
517                                 index = content.indexOf("<en-crypt");\r
518                         } else {\r
519                                 tagFound = false;\r
520                         }\r
521                 }\r
522                 return content;\r
523         }\r
524 \r
525         \r
526         private void addToIndex(String guid, String word, String type) {\r
527                 if (word.length() > 0) {\r
528                         // We have a good word, now let's trim off junk at the beginning or end\r
529                         StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
530                         conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
531                 }\r
532                 return;\r
533         }\r
534         \r
535         private void scanUnindexed() {\r
536                 List<String> notes = conn.getNoteTable().getUnindexed();\r
537                 guid = null;\r
538                 boolean started = false;\r
539                 if (notes.size() > 0) {\r
540                         signal.indexStarted.emit();\r
541                         started = true;\r
542                 }\r
543                 for (int i=0; i<notes.size() && !interrupt && keepRunning; i++) {\r
544                         guid = notes.get(i);\r
545                         if (guid != null && keepRunning) {\r
546                                 indexNoteContent();\r
547                         }\r
548                 }\r
549                 \r
550                 List<String> unindexedResources = conn.getNoteTable().noteResourceTable.getUnindexed();\r
551                 if (unindexedResources.size() > 0 && !started) {\r
552                         signal.indexStarted.emit();\r
553                         started = true;\r
554                 }\r
555                 for (int i=0; i<unindexedResources.size()&& !interrupt && keepRunning; i++) {\r
556                         guid = unindexedResources.get(i);\r
557                         if (keepRunning) {\r
558                                 indexResource();\r
559                         }\r
560                 }\r
561                 if (started && keepRunning && !interrupt) \r
562                         signal.indexFinished.emit();\r
563         }\r
564         \r
565         private void reindexNote() {\r
566                 if (guid == null)\r
567                         return;\r
568                 conn.getNoteTable().setIndexNeeded(guid, true);\r
569         }\r
570         \r
571         private void reindexAll() {\r
572                 conn.getNoteTable().reindexAllNotes();\r
573                 conn.getNoteTable().noteResourceTable.reindexAll(); \r
574         }\r
575 \r
576 }\r