ByteArrayInputStream is = new ByteArrayInputStream(html);\r
ByteArrayOutputStream os = new ByteArrayOutputStream();\r
tidy.parse(is, os);\r
- String text = StringEscapeUtils.unescapeHtml(os.toString().replaceAll("\\<.*?\\>", ""));\r
+ String text = StringEscapeUtils.unescapeHtml(os.toString().replaceAll("\\<.*?\\>", "")) +" "+\r
+ n.getTitle();\r
\r
logger.log(logger.EXTREME, "Splitting words");\r
String[] result = text.toString().split(regex);\r