OSDN Git Service

There are multiple changes with this commit.
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / threads / IndexRunner.java
1 /*\r
2  * This file is part of NeverNote \r
3  * Copyright 2009 Randy Baumgarte\r
4  * \r
5  * This file may be licensed under the terms of of the\r
6  * GNU General Public License Version 2 (the ``GPL'').\r
7  *\r
8  * Software distributed under the License is distributed\r
9  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
10  * express or implied. See the GPL for the specific language\r
11  * governing rights and limitations.\r
12  *\r
13  * You should have received a copy of the GPL along with this\r
14  * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
15  * or write to the Free Software Foundation, Inc.,\r
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
17  *\r
18 */\r
19 \r
20 package cx.fbn.nevernote.threads;\r
21 \r
22 import java.util.concurrent.LinkedBlockingQueue;\r
23 \r
24 import com.evernote.edam.type.Note;\r
25 import com.evernote.edam.type.Resource;\r
26 import com.trolltech.qt.core.QByteArray;\r
27 import com.trolltech.qt.core.QObject;\r
28 import com.trolltech.qt.xml.QDomDocument;\r
29 import com.trolltech.qt.xml.QDomElement;\r
30 import com.trolltech.qt.xml.QDomNodeList;\r
31 \r
32 import cx.fbn.nevernote.Global;\r
33 import cx.fbn.nevernote.signals.NoteResourceSignal;\r
34 import cx.fbn.nevernote.signals.NoteSignal;\r
35 import cx.fbn.nevernote.sql.DatabaseConnection;\r
36 import cx.fbn.nevernote.utilities.ApplicationLogger;\r
37 import cx.fbn.nevernote.utilities.StringUtils;\r
38 \r
39 //public class IndexRunner implements QRunnable {\r
40 public class IndexRunner extends QObject implements Runnable {\r
41         \r
42         private final ApplicationLogger         logger;\r
43         private String                                          guid;\r
44         private QByteArray                                      resourceBinary;\r
45         public volatile NoteSignal                      noteSignal;\r
46         public volatile NoteResourceSignal      resourceSignal;\r
47         private int                                                     indexType;\r
48         public final int                                        CONTENT=1; \r
49         public final int                                        RESOURCE=2;\r
50         private boolean                                         keepRunning;\r
51 //      public volatile int                                     ID;\r
52         private final QDomDocument                      doc;\r
53         private static String                           regex = Global.getWordRegex();\r
54         private final DatabaseConnection        conn;\r
55         private volatile LinkedBlockingQueue<String> workQueue;\r
56 //      private static int MAX_EMPTY_QUEUE_COUNT = 1;\r
57         private static int MAX_QUEUED_WAITING = 1000;\r
58 \r
59         \r
60 \r
61         \r
62         public IndexRunner(String logname, String u, String uid, String pswd, String cpswd) {\r
63                 logger = new ApplicationLogger(logname);\r
64                 conn = new DatabaseConnection(logger, u, uid, pswd, cpswd);\r
65                 noteSignal = new NoteSignal();\r
66                 resourceSignal = new NoteResourceSignal();\r
67 //              threadSignal = new ThreadSignal();\r
68                 indexType = CONTENT;\r
69                 guid = null;\r
70                 keepRunning = true;\r
71                 doc = new QDomDocument();\r
72                 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);\r
73         }\r
74         \r
75         \r
76         public void setIndexType(int t) {\r
77                 indexType = t;\r
78         }\r
79         \r
80         \r
81         @Override\r
82         public void run() {\r
83                 thread().setPriority(Thread.MIN_PRIORITY);\r
84                 logger.log(logger.EXTREME, "Starting index thread ");\r
85                 while (keepRunning) {\r
86                         try {\r
87                                 String work = workQueue.take();\r
88                                 if (work.startsWith("CONTENT")) {\r
89                                         work = work.replace("CONTENT ", "");\r
90                                         guid = work;\r
91                                         indexType = CONTENT;\r
92                                 }\r
93                                 if (work.startsWith("RESOURCE")) {\r
94                                         work = work.replace("RESOURCE ", "");\r
95                                         guid = work;\r
96                                         indexType = RESOURCE;\r
97                                 }\r
98                                 if (work.startsWith("STOP")) {\r
99                                         keepRunning = false;\r
100                                         guid = work;\r
101                                 }\r
102                                 if (guid == null || guid.trim().equals("")) {\r
103                                         setIndexType(0);\r
104                                         resourceSignal.resourceIndexed.emit("null or empty guid");\r
105                                 }\r
106                                 logger.log(logger.EXTREME, "Type:" +indexType);\r
107                                 if (indexType == CONTENT && keepRunning) {\r
108                                         logger.log(logger.MEDIUM, "Indexing note: "+guid);\r
109                                         indexNoteContent();\r
110                                         setIndexType(0);\r
111                                 }\r
112                                 if (indexType == RESOURCE && keepRunning) {\r
113                                         logger.log(logger.MEDIUM, "Indexing resource: "+guid);\r
114                                         indexResource();\r
115                                         setIndexType(0);\r
116                                 }\r
117                         } catch (InterruptedException e) {\r
118                                 // TODO Auto-generated catch block\r
119                                 e.printStackTrace();\r
120                         }\r
121                 }\r
122                 conn.dbShutdown();\r
123         }\r
124         \r
125         // Reindex a note\r
126         public void indexNoteContent() {\r
127                 logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
128                 \r
129                 logger.log(logger.EXTREME, "Getting note content");\r
130                 Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
131                 String data = n.getContent();\r
132                 \r
133                 logger.log(logger.EXTREME, "Removing any encrypted data");\r
134                 data = removeEnCrypt(data);\r
135                 logger.log(logger.EXTREME, "Removing xml markups");\r
136                 String text = StringUtils.unescapeHTML(data.replaceAll("\\<.*?\\>", ""),0);\r
137                 \r
138                 logger.log(logger.EXTREME, "Splitting words");\r
139                 String[] result = text.toString().split(regex);\r
140                 logger.log(logger.EXTREME, "Deleting existing words for note from index");\r
141                 conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");\r
142                 \r
143                 logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
144                 for (int j=0; j<result.length && keepRunning; j++) {\r
145                         logger.log(logger.EXTREME, "Result word: " +result[j]);\r
146                         if (result[j].length() > 0) {\r
147                                 if (Character.isLetterOrDigit(result[j].charAt(0))) {\r
148                                         int len = result[j].length();\r
149                                         StringBuffer buffer = new StringBuffer(result[j].toLowerCase());\r
150                                         logger.log(logger.EXTREME, "Processing " +buffer);\r
151                                         for (int k=len-1; k>=0 && keepRunning; k--) {\r
152                                                 if (!Character.isLetterOrDigit(result[j].charAt(k)))\r
153                                                         buffer.deleteCharAt(k);\r
154                                                 else\r
155                                                         k=-1;\r
156                                         }\r
157 \r
158                                         if (buffer.length()>=Global.minimumWordCount) {\r
159                                                 logger.log(logger.EXTREME, "Adding " +buffer);\r
160                                                 conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), "CONTENT", 100);\r
161                                         }\r
162                                 }\r
163                         }\r
164                 }\r
165                 // If we were interrupted, we will reindex this note next time\r
166                 if (Global.keepRunning) {\r
167                         logger.log(logger.EXTREME, "Resetting note guid needed");\r
168                         conn.getNoteTable().setIndexNeeded(guid, false);\r
169                 }\r
170                 logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
171         }\r
172 \r
173         \r
174         public synchronized boolean addWork(String request) {\r
175                 if (workQueue.size() == 0) {\r
176                         workQueue.offer(request);\r
177                         return true;\r
178                 }\r
179                 return false;\r
180         }\r
181         \r
182         public synchronized int getWorkQueueSize() {\r
183                 return workQueue.size();\r
184         }\r
185         \r
186         public void indexResource() {\r
187                 \r
188                 if (guid == null)\r
189                         return;\r
190                 \r
191                 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);\r
192                 if (r == null || r.getRecognition() == null || r.getRecognition().getBody() == null || r.getRecognition().getBody().length == 0) \r
193                         resourceBinary = new QByteArray(" ");\r
194                 else\r
195                         resourceBinary = new QByteArray(r.getRecognition().getBody());\r
196                 \r
197                 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");\r
198                         \r
199                 doc.setContent(resourceBinary);\r
200                 QDomElement docElem = doc.documentElement();\r
201                         \r
202                 // look for text tags\r
203                 QDomNodeList anchors = docElem.elementsByTagName("t");\r
204                 for (int i=0; i<anchors.length() && keepRunning; i++) {\r
205                         QDomElement enmedia = anchors.at(i).toElement();\r
206                         String weight = new String(enmedia.attribute("w"));\r
207                         String text = new String(enmedia.text()).toLowerCase();\r
208                         if (!text.equals("")) {\r
209                                 conn.getWordsTable().addWordToNoteIndex(guid, text, "RESOURCE", new Integer(weight));\r
210                         }\r
211                 }\r
212                 if (Global.keepRunning)\r
213                         conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);\r
214         }\r
215 \r
216         \r
217         private String removeEnCrypt(String content) {\r
218                 int index = content.indexOf("<en-crypt");\r
219                 int endPos;\r
220                 boolean tagFound = true;\r
221                 while (tagFound && keepRunning) {\r
222                         endPos = content.indexOf("</en-crypt>", index)+11;\r
223                         if (endPos > -1 && index > -1) {\r
224                                 content = content.substring(0,index)+content.substring(endPos);\r
225                                 index = content.indexOf("<en-crypt");\r
226                         } else {\r
227                                 tagFound = false;\r
228                         }\r
229                 }\r
230                 return content;\r
231         }\r
232 \r
233         \r
234         \r
235         \r
236 \r
237 }\r