settings.setValue("newNoteWithSelectedTags", "false");\r
settings.endGroup();\r
}\r
- public static void setMinimumWordLength(int len) {\r
- settings.beginGroup("General");\r
- settings.setValue("minimumWordLength", len);\r
- settings.endGroup(); \r
- }\r
- public static int getMinimumWordLength() {\r
- settings.beginGroup("General");\r
- Integer len = 4;\r
- try {\r
- String val = (String)settings.value("minimumWordLength", "4");\r
- len = new Integer(val);\r
- } catch (Exception e) {\r
- try {\r
- len = (Integer)settings.value("minimumWordLength", 4);\r
- } catch (Exception e1) {\r
- len = 4;\r
- }\r
- }\r
- settings.endGroup();\r
- return len;\r
- \r
- }\r
public static void setRecognitionWeight(int len) {\r
settings.beginGroup("General");\r
settings.setValue("recognitionWeight", len);\r
Global.keepRunning = false;
try {
logger.log(logger.MEDIUM, "Waiting for indexThread to stop");
- indexRunner.thread().join(50);
- logger.log(logger.MEDIUM, "Index thread has stopped");
+ if (indexRunner.thread().isAlive())
+ indexRunner.thread().join(50);
+ if (!indexRunner.thread().isAlive())
+ logger.log(logger.MEDIUM, "Index thread has stopped");
+ else
+ logger.log(logger.MEDIUM, "Index thread still running - bypassing");
} catch (InterruptedException e1) {
e1.printStackTrace();
}
Global.userStoreUrl = "https://"+debugPage.getServer()+"/edam/user";\r
Global.setWordRegex(indexPage.getRegex());\r
Global.setRecognitionWeight(indexPage.getRecognitionWeight());\r
- Global.setMinimumWordLength(indexPage.getWordLength());\r
- Global.minimumWordCount = indexPage.getWordLength(); \r
- Global.setIndexThreads(indexPage.getIndexThreads());\r
Global.setIndexThreadSleepInterval(indexPage.getSleepInterval());\r
Global.setMessageLevel( debugPage.getDebugLevel());\r
Global.saveCarriageReturnFix(debugPage.getCarriageReturnFix());\r
appearancePage.setMinimizeOnClose(Global.minimizeOnClose());\r
\r
indexPage.setRegex(Global.getWordRegex());\r
- indexPage.setWordLength(Global.getMinimumWordLength());\r
- indexPage.setIndexThreads(Global.getIndexThreads());\r
indexPage.setSleepInterval(Global.getIndexThreadSleepInterval());\r
connectionPage.setSyncInterval(Global.getSyncInterval());\r
\r
\r
public class ConfigIndexPage extends QWidget {\r
\r
- private final QSpinBox indexThreadSpinner;\r
- private final QSpinBox lengthSpinner;\r
private final QSpinBox weightSpinner;\r
private final QSpinBox sleepSpinner;\r
private final QCheckBox indexAttachmentsLocally;\r
\r
public ConfigIndexPage(QWidget parent) {\r
// super(parent);\r
- \r
- indexThreadSpinner = new QSpinBox(this);\r
- indexThreadSpinner.setMaximum(5);\r
- indexThreadSpinner.setMinimum(1);\r
- \r
- // Index threads layout\r
- QLabel threadLabel = new QLabel(tr("Maximum Threads"));\r
- QHBoxLayout threadsLayout = new QHBoxLayout();\r
- threadsLayout.addWidget(threadLabel);\r
- threadsLayout.addWidget(indexThreadSpinner);\r
- QGroupBox threadsGroup = new QGroupBox(tr("Indexing Threads (Requires Restart)"));\r
- threadsGroup.setLayout(threadsLayout);\r
- \r
- threadsGroup.setVisible(false);\r
- \r
- \r
- // Minimum word length\r
- QGroupBox wordLengthGroup = new QGroupBox(tr("Word Length"));\r
- QLabel wordLengthLabel = new QLabel(tr("Minimum Word Length"));\r
- lengthSpinner = new QSpinBox();\r
- lengthSpinner.setRange(1,10);\r
- lengthSpinner.setSingleStep(1);\r
- lengthSpinner.setValue(Global.minimumWordCount);\r
- \r
- QHBoxLayout wordLengthLayout = new QHBoxLayout();\r
- wordLengthLayout.addWidget(wordLengthLabel);\r
- wordLengthLayout.addWidget(lengthSpinner);\r
- wordLengthGroup.setLayout(wordLengthLayout);\r
- \r
+ \r
// Recognition weight\r
QGroupBox weightGroup = new QGroupBox(tr("Recognition"));\r
QLabel weightLabel = new QLabel(tr("Minimum Recognition Weight"));\r
\r
\r
QVBoxLayout mainLayout = new QVBoxLayout();\r
- mainLayout.addWidget(threadsGroup);\r
- mainLayout.addWidget(wordLengthGroup);\r
mainLayout.addWidget(sleepGroup);\r
mainLayout.addWidget(weightGroup);\r
mainLayout.addWidget(attachmentGroup);\r
\r
}\r
\r
- //*****************************************\r
- //* Word length get/set methods \r
- //*****************************************\r
- public void setWordLength(int len) {\r
- lengthSpinner.setValue(len);\r
- }\r
- public int getWordLength() {\r
- return lengthSpinner.value();\r
- }\r
\r
//*****************************************\r
//* Get for flag to index attachments \r
return weightSpinner.value();\r
}\r
\r
- //*****************************************\r
- //* Index Threads get/set methods\r
- //*****************************************\r
- public void setIndexThreads(int value) {\r
- indexThreadSpinner.setValue(value);\r
- }\r
- public int getIndexThreads() {\r
- return indexThreadSpinner.value();\r
- }\r
-\r
\r
\r
//*****************************************\r
private List<Note> matches;\r
public List<String> hilightWords;\r
\r
- public EnSearch(DatabaseConnection conn, ApplicationLogger logger, String s, List<Tag> t, int len, int weight) {\r
+ public EnSearch(DatabaseConnection conn, ApplicationLogger logger, String s, List<Tag> t, int weight) {\r
if (s == null) \r
return;\r
if (s.trim().equals(""))\r
return;\r
\r
matches = null;\r
- REnSearch request = new REnSearch(conn, logger, s, t, len, weight);\r
+ REnSearch request = new REnSearch(conn, logger, s, t, weight);\r
matches = request.matchWords();\r
hilightWords = request.getWords();\r
}\r
query.next();\r
return query.valueString(0);\r
}\r
+ // Get a note's content in blob format for index.\r
+ public String getNoteContentNoUTFConversion(String guid) {\r
+ NSqlQuery query = new NSqlQuery(db.getConnection());\r
+ query.prepare("Select content from note where guid=:guid");\r
+ query.bindValue(":guid", guid);\r
+ query.exec(); \r
+ query.next();\r
+ return query.valueString(0);\r
+ }\r
// Get a note by Guid\r
public Note getNote(String noteGuid, boolean loadContent, boolean loadResources, boolean loadRecognition, boolean loadBinary, boolean loadTags) {\r
if (noteGuid == null)\r
private final ApplicationLogger logger;\r
// private final DatabaseConnection db;\r
private boolean any;\r
- private int minimumWordLength = 3;\r
private int minimumRecognitionWeight = 80;\r
private final DatabaseConnection conn;\r
\r
- public REnSearch(DatabaseConnection c, ApplicationLogger l, String s, List<Tag> t, int m, int r) {\r
+ public REnSearch(DatabaseConnection c, ApplicationLogger l, String s, List<Tag> t, int r) {\r
logger = l;\r
conn = c;\r
tagIndex = t;\r
- minimumWordLength = m;\r
minimumRecognitionWeight = r;\r
searchWords = new ArrayList<String>();\r
searchPhrases = new ArrayList<String>();\r
// subject date\r
\r
private void parseTerms(List<String> words) {\r
- int minLen = minimumWordLength;\r
\r
for (int i=0; i<words.size(); i++) {\r
String word = words.get(i);\r
searchPhrase=true;\r
searchPhrases.add(word.toLowerCase());\r
}\r
- if (!searchPhrase && pos < 0 && (word.length() >= minLen || word.indexOf('*')>=0)) \r
- getWords().add(word);\r
+ if (!searchPhrase && pos < 0) \r
+ getWords().add("*"+word+"*");\r
if (word.startsWith("intitle:")) \r
intitle.add("*"+word+"*");\r
if (word.startsWith("-intitle:")) \r
\r
package cx.fbn.nevernote.threads;\r
\r
-import java.io.ByteArrayInputStream;\r
-import java.io.ByteArrayOutputStream;\r
import java.io.File;\r
import java.io.FileInputStream;\r
import java.io.FileNotFoundException;\r
import org.apache.tika.parser.pdf.PDFParser;\r
import org.apache.tika.parser.rtf.RTFParser;\r
import org.apache.tika.sax.BodyContentHandler;\r
-import org.w3c.tidy.Tidy;\r
import org.xml.sax.ContentHandler;\r
import org.xml.sax.SAXException;\r
\r
e.printStackTrace();\r
}\r
}\r
+ logger.log(logger.EXTREME, "Shutting down database");\r
conn.dbShutdown();\r
+ logger.log(logger.EXTREME, "Database shut down. Exiting thread");\r
}\r
\r
// Reindex a note\r
public void indexNoteContent() {\r
\r
-// if (wordMap.size() > 0)\r
-// wordMap.clear();\r
logger.log(logger.EXTREME, "Entering indexRunner.indexNoteContent()");\r
\r
logger.log(logger.EXTREME, "Getting note content");\r
Note n = conn.getNoteTable().getNote(guid,true,false,true,true, true);\r
String data = n.getContent();\r
+ data = conn.getNoteTable().getNoteContentNoUTFConversion(n.getGuid());\r
+ System.out.println(data);\r
\r
logger.log(logger.EXTREME, "Removing any encrypted data");\r
- data = removeEnCrypt(data);\r
+ data = removeEnCrypt(data.toString());\r
logger.log(logger.EXTREME, "Removing xml markups");\r
- Tidy tidy = new Tidy();\r
- tidy.getStderr().close(); // the listener will capture messages\r
- tidy.setXmlTags(true);\r
- byte html[] = data.getBytes();\r
- ByteArrayInputStream is = new ByteArrayInputStream(html);\r
- ByteArrayOutputStream os = new ByteArrayOutputStream();\r
- tidy.parse(is, os);\r
- String text = StringEscapeUtils.unescapeHtml(os.toString().replaceAll("\\<.*?\\>", "")) +" "+\r
- n.getTitle();\r
+ String text = removeTags(StringEscapeUtils.unescapeHtml(data) +" "+\r
+ n.getTitle());\r
\r
logger.log(logger.EXTREME, "Splitting words");\r
String[] result = text.toString().split(regex);\r
\r
logger.log(logger.EXTREME, "Number of words found: " +result.length);\r
for (int j=0; j<result.length && keepRunning; j++) {\r
- logger.log(logger.EXTREME, "Result word: " +result[j]);\r
- addToIndex(guid, result[j], "CONTENT");\r
+ if (!result[j].trim().equals("")) {\r
+ logger.log(logger.EXTREME, "Result word: " +result[j]);\r
+ addToIndex(guid, result[j], "CONTENT");\r
+ }\r
}\r
// If we were interrupted, we will reindex this note next time\r
if (Global.keepRunning) {\r
}\r
logger.log(logger.EXTREME, "Leaving indexRunner.indexNoteContent()");\r
}\r
+ \r
+ \r
+ private String removeTags(String text) {\r
+ StringBuffer buffer = new StringBuffer(text);\r
+ boolean inTag = false;\r
+ for (int i=buffer.length()-1; i>=0; i--) {\r
+ if (buffer.charAt(i) == '>')\r
+ inTag = true;\r
+ if (buffer.charAt(i) == '<')\r
+ inTag = false;\r
+ if (inTag || buffer.charAt(i) == '<')\r
+ buffer.deleteCharAt(i);\r
+ }\r
+ \r
+ return buffer.toString();\r
+ }\r
\r
\r
public synchronized boolean addWork(String request) {\r
if (word.length() > 0) {\r
// We have a good word, now let's trim off junk at the beginning or end\r
StringBuffer buffer = new StringBuffer(word.toLowerCase());\r
- for (int x = buffer.length()-1; x>=0; x--) {\r
- if (!Character.isLetterOrDigit(buffer.charAt(x)))\r
- buffer = buffer.deleteCharAt(x);\r
- else\r
- x=-1;\r
- }\r
- // Things have been trimmed off the end, so reverse the string & repeat.\r
- buffer = buffer.reverse();\r
- for (int x = buffer.length()-1; x>=0 && keepRunning; x--) {\r
- if (!Character.isLetterOrDigit(buffer.charAt(x)))\r
- buffer = buffer.deleteCharAt(x);\r
- else\r
- x=-1;\r
- }\r
- // Restore the string back to the proper order.\r
- buffer = buffer.reverse();\r
- \r
- if (buffer.length()>=Global.minimumWordCount) {\r
- conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
- }\r
+ conn.getWordsTable().addWordToNoteIndex(guid, buffer.toString(), type, 100);\r
}\r
return;\r
}\r
e.printStackTrace();\r
}\r
\r
-\r
}\r
\r
//***************************************************************\r
// load saved search index\r
setSavedSearchIndex(conn.getSavedSearchTable().getAll());\r
// Load search helper utility\r
- enSearch = new EnSearch(conn, logger, "", getTagIndex(), Global.getMinimumWordLength(), Global.getRecognitionWeight());\r
+ enSearch = new EnSearch(conn, logger, "", getTagIndex(), Global.getRecognitionWeight());\r
logger.log(logger.HIGH, "Building note index");\r
\r
// if (getMasterNoteIndex() == null) { \r
//***************************************************************\r
//***************************************************************\r
public void setEnSearch(String t) {\r
- enSearch = new EnSearch(conn,logger, t, getTagIndex(), Global.getMinimumWordLength(), Global.getRecognitionWeight());\r
+ enSearch = new EnSearch(conn,logger, t, getTagIndex(), Global.getRecognitionWeight());\r
enSearchChanged = true;\r
}\r
// Save search tags\r