2 * This file is part of NeverNote
\r
3 * Copyright 2009 Randy Baumgarte
\r
5 * This file may be licensed under the terms of of the
\r
6 * GNU General Public License Version 2 (the ``GPL'').
\r
8 * Software distributed under the License is distributed
\r
9 * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
\r
10 * express or implied. See the GPL for the specific language
\r
11 * governing rights and limitations.
\r
13 * You should have received a copy of the GPL along with this
\r
14 * program. If not, go to http://www.gnu.org/licenses/gpl.html
\r
15 * or write to the Free Software Foundation, Inc.,
\r
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
\r
21 package cx.fbn.nevernote.xml;
\r
23 import java.util.ArrayList;
\r
24 import java.util.List;
\r
26 import com.trolltech.qt.xml.QDomDocument;
\r
27 import com.trolltech.qt.xml.QDomElement;
\r
28 import com.trolltech.qt.xml.QDomNode;
\r
29 import com.trolltech.qt.xml.QDomNodeList;
\r
30 import com.trolltech.qt.xml.QDomText;
\r
32 import cx.fbn.nevernote.Global;
\r
33 import cx.fbn.nevernote.evernote.EnCrypt;
\r
35 public class XMLCleanup {
\r
36 private String content;
\r
37 private QDomDocument doc;
\r
38 private final List<String> resources;
\r
40 public XMLCleanup() {
\r
41 resources = new ArrayList<String>();
\r
45 public void setValue(String text) {
\r
47 /* content = content.replace("<HR>", "<hr/>");
\r
48 content = content.replace("<hr>", "<hr/>");
\r
49 content = content.replace("</HR>", "");
\r
50 content = content.replace("</hr>", ""); */
\r
52 public String getValue() {
\r
55 // Validate the contents of the note. Change unsupported things
\r
56 public void validate() {
\r
57 doc = new QDomDocument();
\r
58 int br = content.lastIndexOf("</en-note>");
\r
59 content = new String(content.substring(0,br));
\r
61 int k = content.indexOf("<en-note");
\r
64 newContent = new String(content.substring(k));
\r
67 // Fix the background color
\r
70 newContent = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
\r
71 +"<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">\n"
\r
75 QDomDocument.Result result = doc.setContent(newContent);
\r
76 if (!result.success) {
\r
77 System.out.println("DOM error in XMLValidator.validate()");
\r
78 System.out.println(newContent);
\r
79 System.out.println("Location : Line-"+result.errorLine +" Column-" + result.errorColumn);
\r
80 System.out.println("Exiting");
\r
85 QDomNodeList noteAnchors = doc.elementsByTagName("en-note");
\r
86 int noteCount = noteAnchors.length();
\r
87 for (int i=noteCount-1; i>=0; i--) {
\r
88 if (noteAnchors.at(i).toElement().hasAttribute("style")) {
\r
89 String style = noteAnchors.at(i).toElement().attribute("style");
\r
90 int startColor = style.indexOf("background-color:");
\r
91 if (startColor > -1) {
\r
92 String color = style.substring(startColor+17);
\r
93 color = color.substring(0,color.indexOf(";"));
\r
94 noteAnchors.at(i).toElement().setAttribute("bgcolor", color);
\r
101 // Remove invalid elements & attributes
\r
102 // Modify en-media tags
\r
103 QDomNodeList anchors;
\r
104 for (String key : Global.invalidAttributes.keySet()) {
\r
105 anchors = doc.elementsByTagName(key);
\r
106 int enMediaCount = anchors.length();
\r
107 for (int i=enMediaCount-1; i>=0; i--) {
\r
108 QDomElement element = anchors.at(i).toElement();
\r
109 ArrayList<String> names = Global.invalidAttributes.get(element.nodeName().toLowerCase());
\r
110 if (names != null) {
\r
111 for (int j=0; j<names.size(); j++) {
\r
112 element.removeAttribute(names.get(j));
\r
118 List<String> elements = Global.invalidElements;
\r
119 for (int j=0; j<elements.size(); j++) {
\r
120 anchors = doc.elementsByTagName(elements.get(j));
\r
121 int enMediaCount = anchors.length();
\r
122 for (int i=enMediaCount-1; i>=0; i--) {
\r
123 QDomElement element = anchors.at(i).toElement();
\r
124 element.setTagName("span");
\r
127 content = doc.toString();
\r
130 // Start looking through the tree.
\r
131 private void scanTags() {
\r
132 // System.out.println("scanTags start");
\r
133 // QDomElement element = doc.firstChildElement();
\r
134 // parseChildren(element.firstChild());
\r
136 if (doc.hasChildNodes())
\r
137 parseNodes(doc.childNodes());
\r
141 private void parseNodes(QDomNodeList nodes) {
\r
142 for (int i=0; i<nodes.size(); i++) {
\r
143 QDomNode node = nodes.at(i);
\r
144 if (node.hasChildNodes())
\r
145 parseNodes(node.childNodes());
\r
151 // Parse through individual nodes
\r
152 private void parseChildren(QDomNode node) {
\r
153 System.out.println("Starting parseChildren " +node.toElement().nodeName() +" : " +node.toElement().text());
\r
154 for(; !node.isNull(); node = node.nextSibling()) {
\r
155 if (node.hasChildNodes()) {
\r
156 QDomNodeList l = node.childNodes();
\r
158 for (int i=0; i<l.size(); i++) {
\r
159 System.out.println("Child node size: " +l.size() +" " +i);
\r
160 parseChildren(l.at(i));
\r
169 // Fix the contents of the node back to ENML.
\r
170 private void fixNode(QDomNode node) {
\r
171 QDomElement scanChecked = node.toElement();
\r
172 if (scanChecked.hasAttribute("checked")) {
\r
173 System.out.println(scanChecked.attribute("checked"));
\r
174 if (!scanChecked.attribute("checked").equalsIgnoreCase("true"))
\r
175 scanChecked.setAttribute("checked", "false");
\r
177 if (node.nodeName().equalsIgnoreCase("#comment") || node.nodeName().equalsIgnoreCase("script")) {
\r
178 node.parentNode().removeChild(node);
\r
180 if (node.nodeName().equalsIgnoreCase("input")) {
\r
181 QDomElement e = node.toElement();
\r
182 e.setTagName("en-todo");
\r
183 String value = e.attribute("value");
\r
184 e.removeAttribute("value");
\r
185 e.removeAttribute("unchecked");
\r
186 e.setAttribute("checked", value);
\r
187 e.removeAttribute("onclick");
\r
188 e.removeAttribute("type");
\r
192 if (node.nodeName().equalsIgnoreCase("a")) {
\r
193 QDomElement e = node.toElement();
\r
194 String enTag = e.attribute("en-tag");
\r
195 if (enTag.equalsIgnoreCase("en-media")) {
\r
196 e.setTagName("en-media");
\r
197 e.removeAttribute("en-type");
\r
198 e.removeAttribute("en-tag");
\r
199 e.removeAttribute("en-new");
\r
200 resources.add(e.attribute("guid"));
\r
201 e.removeAttribute("href");
\r
202 e.removeAttribute("guid");
\r
203 e.setNodeValue("");
\r
204 e.removeChild(e.firstChildElement());
\r
207 // Restore image resources
\r
208 if (node.nodeName().equalsIgnoreCase("img")) {
\r
209 QDomElement e = node.toElement();
\r
210 String enType = e.attribute("en-tag");
\r
212 // Check if we have an en-crypt tag. Change it from an img to en-crypt
\r
213 if (enType.equalsIgnoreCase("en-crypt")) {
\r
215 String encrypted = e.attribute("alt");
\r
217 QDomText crypt = doc.createTextNode(encrypted);
\r
218 e.appendChild(crypt);
\r
220 e.removeAttribute("v:shapes");
\r
221 e.removeAttribute("en-tag");
\r
222 e.removeAttribute("contenteditable");
\r
223 e.removeAttribute("alt");
\r
224 e.removeAttribute("src");
\r
225 e.removeAttribute("id");
\r
226 e.removeAttribute("onclick");
\r
227 e.removeAttribute("onmouseover");
\r
228 e.setTagName("en-crypt");
\r
229 node.removeChild(e);
\r
233 // If we've gotten this far, we have an en-media tag
\r
234 e.setTagName(enType);
\r
235 resources.add(e.attribute("guid"));
\r
236 e.removeAttribute("guid");
\r
237 e.removeAttribute("src");
\r
238 e.removeAttribute("en-new");
\r
239 e.removeAttribute("en-tag");
\r
242 // Tags like <ul><ul><li>1</li></ul></ul> are technically valid, but Evernote
\r
243 // expects that a <ul> tag only has a <li>, so we will need to change them
\r
244 // to this: <ul><li><ul><li>1</li></ul></li></ul>
\r
245 if (node.nodeName().equalsIgnoreCase("ul")) {
\r
246 QDomNode firstChild = node.firstChild();
\r
247 QDomElement childElement = firstChild.toElement();
\r
248 if (childElement.nodeName().equalsIgnoreCase("ul")) {
\r
249 QDomElement newElement = doc.createElement("li");
\r
250 node.insertBefore(newElement, firstChild);
\r
251 node.removeChild(firstChild);
\r
252 newElement.appendChild(firstChild);
\r
256 if (node.nodeName().equalsIgnoreCase("en-crypt-temp")) {
\r
257 QDomElement e = node.toElement();
\r
258 String slot = e.attribute("slot");
\r
259 e.removeAttribute("slot");
\r
260 String password = Global.passwordSafe.get(slot);
\r
261 Global.passwordSafe.remove(slot);
\r
262 EnCrypt crypt = new EnCrypt();
\r
263 String encrypted = crypt.encrypt(e.text(), password, 64);
\r
265 QDomText newText = doc.createTextNode(encrypted);
\r
266 e.appendChild(newText);
\r
267 e.removeChild(e.firstChild());
\r
268 e.setTagName("en-crypt");
\r
270 if (node.nodeName().equalsIgnoreCase("en-hilight")) {
\r
271 QDomElement e = node.toElement();
\r
272 QDomText newText = doc.createTextNode(e.text());
\r
273 e.parentNode().replaceChild(newText,e);
\r
275 if (node.nodeName().equalsIgnoreCase("span")) {
\r
276 QDomElement e = node.toElement();
\r
277 if (e.attribute("class").equalsIgnoreCase("en-hilight") || e.attribute("class").equalsIgnoreCase("en-spell")) {
\r
278 QDomText newText = doc.createTextNode(e.text());
\r
279 e.parentNode().replaceChild(newText,e);
\r
281 if (e.attribute("pdfnavigationtable").equalsIgnoreCase("true")) {
\r
282 node.parentNode().removeChild(node);
\r
289 // Return old resources we've found
\r
290 public List<String> getResources() {
\r