2 * This file is part of NixNote/NeighborNote
\r
3 * Copyright 2009 Randy Baumgarte
\r
5 * This file may be licensed under the terms of of the
\r
6 * GNU General Public License Version 2 (the ``GPL'').
\r
8 * Software distributed under the License is distributed
\r
9 * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
\r
10 * express or implied. See the GPL for the specific language
\r
11 * governing rights and limitations.
\r
13 * You should have received a copy of the GPL along with this
\r
14 * program. If not, go to http://www.gnu.org/licenses/gpl.html
\r
15 * or write to the Free Software Foundation, Inc.,
\r
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
\r
21 package cx.fbn.nevernote.xml;
\r
23 import java.util.ArrayList;
\r
24 import java.util.List;
\r
26 import com.trolltech.qt.xml.QDomDocument;
\r
27 import com.trolltech.qt.xml.QDomElement;
\r
28 import com.trolltech.qt.xml.QDomNode;
\r
29 import com.trolltech.qt.xml.QDomNodeList;
\r
30 import com.trolltech.qt.xml.QDomText;
\r
32 import cx.fbn.nevernote.Global;
\r
34 public class XMLCleanup {
\r
35 private String content;
\r
36 private QDomDocument doc;
\r
37 private final List<String> resources;
\r
39 public XMLCleanup() {
\r
40 resources = new ArrayList<String>();
\r
44 public void setValue(String text) {
\r
47 public String getValue() {
\r
50 // Validate the contents of the note. Change unsupported things
\r
51 public void validate() {
\r
52 doc = new QDomDocument();
\r
53 int br = content.lastIndexOf("</en-note>");
\r
54 content = new String(content.substring(0,br));
\r
56 int k = content.indexOf("<en-note");
\r
59 newContent = new String(content.substring(k));
\r
62 // Fix the background color
\r
65 newContent = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
\r
66 +"<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">\n"
\r
70 QDomDocument.Result result = doc.setContent(newContent);
\r
71 if (!result.success) {
\r
72 System.out.println("DOM error in XMLValidator.validate()");
\r
73 System.out.println(newContent);
\r
74 System.out.println("Location : Line-"+result.errorLine +" Column-" + result.errorColumn);
\r
75 System.out.println("Exiting");
\r
80 QDomNodeList noteAnchors = doc.elementsByTagName("en-note");
\r
81 int noteCount = noteAnchors.length();
\r
82 for (int i=noteCount-1; i>=0; i--) {
\r
83 if (noteAnchors.at(i).toElement().hasAttribute("style")) {
\r
84 String style = noteAnchors.at(i).toElement().attribute("style");
\r
85 int startColor = style.indexOf("background-color:");
\r
86 if (startColor > -1) {
\r
87 String color = style.substring(startColor+17);
\r
88 color = color.substring(0,color.indexOf(";"));
\r
89 noteAnchors.at(i).toElement().setAttribute("bgcolor", color);
\r
94 // Scan through tags node by node
\r
97 // Scan again making sure we didn't miss any <a> tags. Sometimes we do
\r
98 QDomNodeList anchorList = doc.elementsByTagName("a");
\r
99 int anchorCount = anchorList.length();
\r
100 for (int i=anchorCount-1; i>=0; i--) {
\r
101 QDomNode link = anchorList.at(i);
\r
102 link = fixLinkNode(link);
\r
105 // Remove invalid elements & attributes
\r
106 // Modify en-media tags
\r
107 QDomNodeList anchors;
\r
108 for (String key : Global.invalidAttributes.keySet()) {
\r
109 anchors = doc.elementsByTagName(key);
\r
110 int enMediaCount = anchors.length();
\r
111 for (int i=enMediaCount-1; i>=0; i--) {
\r
112 QDomElement element = anchors.at(i).toElement();
\r
113 ArrayList<String> names = Global.invalidAttributes.get(element.nodeName().toLowerCase());
\r
114 if (names != null) {
\r
115 for (int j=0; j<names.size(); j++) {
\r
116 element.removeAttribute(names.get(j));
\r
122 List<String> elements = Global.invalidElements;
\r
123 for (int j=0; j<elements.size(); j++) {
\r
124 anchors = doc.elementsByTagName(elements.get(j));
\r
125 int enMediaCount = anchors.length();
\r
126 for (int i=enMediaCount-1; i>=0; i--) {
\r
127 QDomElement element = anchors.at(i).toElement();
\r
128 element.setTagName("span");
\r
131 // Replace the XML carrage returns that the toString() creates.
\r
132 content = doc.toString().replace( "
", "" );
\r
135 // Start looking through the tree.
\r
136 private void scanTags() {
\r
138 if (doc.hasChildNodes())
\r
139 parseNodes(doc.childNodes());
\r
143 private void parseNodes(QDomNodeList nodes) {
\r
144 for (int i=0; i<nodes.size(); i++) {
\r
145 QDomNode node = nodes.at(i);
\r
146 if (node.hasChildNodes())
\r
147 parseNodes(node.childNodes());
\r
153 // Fix the contents of the node back to ENML.
\r
154 private void fixNode(QDomNode node) {
\r
155 QDomElement scanChecked = node.toElement();
\r
156 if (scanChecked.hasAttribute("checked")) {
\r
157 System.out.println(scanChecked.attribute("checked"));
\r
158 if (!scanChecked.attribute("checked").equalsIgnoreCase("true"))
\r
159 scanChecked.setAttribute("checked", "false");
\r
161 if (node.nodeName().equalsIgnoreCase("#comment") || node.nodeName().equalsIgnoreCase("script")) {
\r
162 node.parentNode().removeChild(node);
\r
164 if (node.nodeName().equalsIgnoreCase("input")) {
\r
165 QDomElement e = node.toElement();
\r
166 e.setTagName("en-todo");
\r
167 String value = e.attribute("value");
\r
168 if (value.trim().equals(""))
\r
170 e.removeAttribute("value");
\r
171 e.removeAttribute("unchecked");
\r
172 e.setAttribute("checked", value);
\r
173 e.removeAttribute("onclick");
\r
174 e.removeAttribute("onmouseover");
\r
175 e.removeAttribute("type");
\r
178 if (node.nodeName().equalsIgnoreCase("a")) {
\r
179 node = fixLinkNode(node);
\r
181 // Restore image resources
\r
182 if (node.nodeName().equalsIgnoreCase("img")) {
\r
183 QDomElement e = node.toElement();
\r
184 String enType = e.attribute("en-tag");
\r
186 // Check if we have an en-crypt tag. Change it from an img to en-crypt
\r
187 if (enType.equalsIgnoreCase("en-crypt")) {
\r
189 String encrypted = e.attribute("alt");
\r
191 QDomText crypt = doc.createTextNode(encrypted);
\r
192 e.appendChild(crypt);
\r
194 e.removeAttribute("v:shapes");
\r
195 e.removeAttribute("en-tag");
\r
196 e.removeAttribute("contenteditable");
\r
197 e.removeAttribute("alt");
\r
198 e.removeAttribute("src");
\r
199 e.removeAttribute("id");
\r
200 e.removeAttribute("onclick");
\r
201 e.removeAttribute("onmouseover");
\r
202 e.setTagName("en-crypt");
\r
203 node.removeChild(e);
\r
207 // Check if we have a LaTeX image. Remove the parent link tag
\r
208 if (enType.equalsIgnoreCase("en-latex")) {
\r
209 enType = "en-media";
\r
210 QDomNode parent = e.parentNode();
\r
211 parent.removeChild(e);
\r
212 parent.parentNode().replaceChild(e, parent);
\r
215 // If we've gotten this far, we have an en-media tag
\r
216 e.setTagName(enType);
\r
217 resources.add(e.attribute("guid"));
\r
218 e.removeAttribute("guid");
\r
219 e.removeAttribute("src");
\r
220 e.removeAttribute("en-new");
\r
221 e.removeAttribute("en-tag");
\r
224 // Tags like <ul><ul><li>1</li></ul></ul> are technically valid, but Evernote
\r
225 // expects that a <ul> tag only has a <li>, so we will need to change them
\r
226 // to this: <ul><li><ul><li>1</li></ul></li></ul>
\r
227 if (node.nodeName().equalsIgnoreCase("ul")) {
\r
228 QDomNode firstChild = node.firstChild();
\r
229 QDomElement childElement = firstChild.toElement();
\r
230 if (childElement.nodeName().equalsIgnoreCase("ul")) {
\r
231 QDomElement newElement = doc.createElement("li");
\r
232 node.insertBefore(newElement, firstChild);
\r
233 node.removeChild(firstChild);
\r
234 newElement.appendChild(firstChild);
\r
238 if (node.nodeName().equalsIgnoreCase("en-hilight")) {
\r
239 QDomElement e = node.toElement();
\r
240 QDomText newText = doc.createTextNode(e.text());
\r
241 e.parentNode().replaceChild(newText,e);
\r
243 if (node.nodeName().equalsIgnoreCase("span")) {
\r
244 QDomElement e = node.toElement();
\r
245 if (e.attribute("class").equalsIgnoreCase("en-hilight") || e.attribute("class").equalsIgnoreCase("en-spell")) {
\r
246 QDomText newText = doc.createTextNode(e.text());
\r
247 e.parentNode().replaceChild(newText,e);
\r
249 if (e.attribute("pdfnavigationtable").equalsIgnoreCase("true")) {
\r
250 node.parentNode().removeChild(node);
\r
254 // Fix up encryption tag
\r
255 if (node.nodeName().equalsIgnoreCase("en-crypt-temp")) {
\r
256 QDomElement e = node.toElement();
\r
257 e.setTagName("en-crypt");
\r
258 String crypt = e.attribute("value");
\r
259 e.removeAttribute("value");
\r
260 QDomText cryptValue = doc.createTextNode(crypt);
\r
261 e.appendChild(cryptValue);
\r
266 private QDomNode fixLinkNode(QDomNode node) {
\r
267 QDomElement e = node.toElement();
\r
268 String enTag = e.attribute("en-tag");
\r
269 if (enTag.equalsIgnoreCase("en-media")) {
\r
270 e.setTagName("en-media");
\r
271 e.removeAttribute("en-type");
\r
272 e.removeAttribute("en-tag");
\r
273 e.removeAttribute("en-new");
\r
274 resources.add(e.attribute("guid"));
\r
275 e.removeAttribute("href");
\r
276 e.removeAttribute("guid");
\r
277 e.setNodeValue("");
\r
278 e.removeChild(e.firstChildElement());
\r
284 // Return old resources we've found
\r
285 public List<String> getResources() {
\r