2 * This file is part of NeverNote
\r
3 * Copyright 2009 Randy Baumgarte
\r
5 * This file may be licensed under the terms of of the
\r
6 * GNU General Public License Version 2 (the ``GPL'').
\r
8 * Software distributed under the License is distributed
\r
9 * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
\r
10 * express or implied. See the GPL for the specific language
\r
11 * governing rights and limitations.
\r
13 * You should have received a copy of the GPL along with this
\r
14 * program. If not, go to http://www.gnu.org/licenses/gpl.html
\r
15 * or write to the Free Software Foundation, Inc.,
\r
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
\r
20 package cx.fbn.nevernote.threads;
\r
22 import java.util.concurrent.LinkedBlockingQueue;
\r
24 import com.evernote.edam.type.Note;
\r
25 import com.evernote.edam.type.Resource;
\r
26 import com.trolltech.qt.core.QByteArray;
\r
27 import com.trolltech.qt.core.QObject;
\r
28 import com.trolltech.qt.xml.QDomDocument;
\r
29 import com.trolltech.qt.xml.QDomElement;
\r
30 import com.trolltech.qt.xml.QDomNodeList;
\r
32 import cx.fbn.nevernote.Global;
\r
33 import cx.fbn.nevernote.signals.NoteResourceSignal;
\r
34 import cx.fbn.nevernote.signals.NoteSignal;
\r
35 import cx.fbn.nevernote.sql.DatabaseConnection;
\r
36 import cx.fbn.nevernote.utilities.ApplicationLogger;
\r
37 import cx.fbn.nevernote.utilities.StringUtils;
\r
39 //public class IndexRunner implements QRunnable {
\r
40 public class IndexRunner extends QObject implements Runnable {
\r
42 private final ApplicationLogger logger;
\r
43 private String guid;
\r
44 private QByteArray resourceBinary;
\r
45 public volatile NoteSignal noteSignal;
\r
46 public volatile NoteResourceSignal resourceSignal;
\r
47 private int indexType;
\r
48 public final int CONTENT=1;
\r
49 public final int RESOURCE=2;
\r
50 private boolean keepRunning;
\r
51 // public volatile int ID;
\r
52 private final QDomDocument doc;
\r
53 private static String regex = Global.getWordRegex();
\r
54 private final DatabaseConnection conn;
\r
55 private volatile LinkedBlockingQueue<String> workQueue;
\r
56 // private static int MAX_EMPTY_QUEUE_COUNT = 1;
\r
57 private static int MAX_QUEUED_WAITING = 1000;
\r
62 public IndexRunner(String logname, String u, String uid, String pswd, String cpswd) {
\r
63 logger = new ApplicationLogger(logname);
\r
64 conn = new DatabaseConnection(logger, u, uid, pswd, cpswd);
\r
65 noteSignal = new NoteSignal();
\r
66 resourceSignal = new NoteResourceSignal();
\r
67 // threadSignal = new ThreadSignal();
\r
68 indexType = CONTENT;
\r
71 doc = new QDomDocument();
\r
72 workQueue=new LinkedBlockingQueue<String>(MAX_QUEUED_WAITING);
\r
76 public void setIndexType(int t) {
\r
83 thread().setPriority(Thread.MIN_PRIORITY);
\r
84 logger.log(logger.EXTREME, "Starting index thread ");
\r
85 while (keepRunning) {
\r
87 String work = workQueue.take();
\r
88 if (work.startsWith("CONTENT")) {
\r
89 work = work.replace("CONTENT ", "");
\r
91 indexType = CONTENT;
\r
93 if (work.startsWith("RESOURCE")) {
\r
94 work = work.replace("RESOURCE ", "");
\r
96 indexType = RESOURCE;
\r
98 if (work.startsWith("STOP")) {
\r
99 keepRunning = false;
\r
102 if (guid == null || guid.trim().equals("")) {
\r
104 resourceSignal.resourceIndexed.emit("null or empty guid");
\r
106 logger.log(logger.EXTREME, "Type:" +indexType);
\r
107 if (indexType == CONTENT && keepRunning) {
\r
108 logger.log(logger.MEDIUM, "Indexing note: "+guid);
\r
109 indexNoteContent();
\r
112 if (indexType == RESOURCE && keepRunning) {
\r
113 logger.log(logger.MEDIUM, "Indexing resource: "+guid);
\r
117 } catch (InterruptedException e) {
\r
118 // TODO Auto-generated catch block
\r
119 e.printStackTrace();
\r
126 public void indexNoteContent() {
\r
127 logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");
\r
129 logger.log(logger.EXTREME, "Getting note content");
\r
130 Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);
\r
131 String data = n.getContent();
\r
133 logger.log(logger.EXTREME, "Removing any encrypted data");
\r
134 data = removeEnCrypt(data);
\r
135 logger.log(logger.EXTREME, "Removing xml markups");
\r
136 String text = StringUtils.unescapeHTML(data.replaceAll("\\<.*?\\>", ""),0);
\r
138 logger.log(logger.EXTREME, "Splitting words");
\r
139 String[] result = text.toString().split(regex);
\r
140 logger.log(logger.EXTREME, "Deleting existing words for note from index");
\r
141 conn.getWordsTable().expungeFromWordIndex(guid, "CONTENT");
\r
143 logger.log(logger.EXTREME, "Number of words found: " +result.length);
\r
144 for (int j=0; j<result.length && keepRunning; j++) {
\r
145 logger.log(logger.EXTREME, "Result word: " +result[j]);
\r
146 if (result[j].length() > 0) {
\r
147 if (Character.isLetterOrDigit(result[j].charAt(0))) {
\r
148 int len = result[j].length();
\r
149 StringBuffer buffer = new StringBuffer(result[j].toLowerCase());
\r
150 logger.log(logger.EXTREME, "Processing " +buffer);
\r
151 for (int k=len-1; k>=0 && keepRunning; k--) {
\r
152 if (!Character.isLetterOrDigit(result[j].charAt(k)))
\r
153 buffer.deleteCharAt(k);
\r
158 if (buffer.length()>=Global.minimumWordCount) {
\r
159 logger.log(logger.EXTREME, "Adding " +buffer);
\r
160 conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), "CONTENT", 100);
\r
165 // If we were interrupted, we will reindex this note next time
\r
166 if (Global.keepRunning) {
\r
167 logger.log(logger.EXTREME, "Resetting note guid needed");
\r
168 conn.getNoteTable().setIndexNeeded(guid, false);
\r
170 logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");
\r
174 public synchronized boolean addWork(String request) {
\r
175 if (workQueue.size() == 0) {
\r
176 workQueue.offer(request);
\r
182 public synchronized int getWorkQueueSize() {
\r
183 return workQueue.size();
\r
186 public void indexResource() {
\r
191 Resource r = conn.getNoteTable().noteResourceTable.getNoteResourceRecognition(guid);
\r
192 if (r == null || r.getRecognition() == null || r.getRecognition().getBody() == null || r.getRecognition().getBody().length == 0)
\r
193 resourceBinary = new QByteArray(" ");
\r
195 resourceBinary = new QByteArray(r.getRecognition().getBody());
\r
197 conn.getWordsTable().expungeFromWordIndex(guid, "RESOURCE");
\r
199 doc.setContent(resourceBinary);
\r
200 QDomElement docElem = doc.documentElement();
\r
202 // look for text tags
\r
203 QDomNodeList anchors = docElem.elementsByTagName("t");
\r
204 for (int i=0; i<anchors.length() && keepRunning; i++) {
\r
205 QDomElement enmedia = anchors.at(i).toElement();
\r
206 String weight = new String(enmedia.attribute("w"));
\r
207 String text = new String(enmedia.text()).toLowerCase();
\r
208 if (!text.equals("")) {
\r
209 conn.getWordsTable().addWordToNoteIndex(guid, text, "RESOURCE", new Integer(weight));
\r
212 if (Global.keepRunning)
\r
213 conn.getNoteTable().noteResourceTable.setIndexNeeded(guid,false);
\r
217 private String removeEnCrypt(String content) {
\r
218 int index = content.indexOf("<en-crypt");
\r
220 boolean tagFound = true;
\r
221 while (tagFound && keepRunning) {
\r
222 endPos = content.indexOf("</en-crypt>", index)+11;
\r
223 if (endPos > -1 && index > -1) {
\r
224 content = content.substring(0,index)+content.substring(endPos);
\r
225 index = content.indexOf("<en-crypt");
\r