2 * This file is part of NixNote
\r
3 * Copyright 2009 Randy Baumgarte
\r
5 * This file may be licensed under the terms of of the
\r
6 * GNU General Public License Version 2 (the ``GPL'').
\r
8 * Software distributed under the License is distributed
\r
9 * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
\r
10 * express or implied. See the GPL for the specific language
\r
11 * governing rights and limitations.
\r
13 * You should have received a copy of the GPL along with this
\r
14 * program. If not, go to http://www.gnu.org/licenses/gpl.html
\r
15 * or write to the Free Software Foundation, Inc.,
\r
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
\r
21 package cx.fbn.nevernote.xml;
\r
23 import java.util.ArrayList;
\r
24 import java.util.List;
\r
26 import com.trolltech.qt.xml.QDomDocument;
\r
27 import com.trolltech.qt.xml.QDomElement;
\r
28 import com.trolltech.qt.xml.QDomNode;
\r
29 import com.trolltech.qt.xml.QDomNodeList;
\r
30 import com.trolltech.qt.xml.QDomText;
\r
32 import cx.fbn.nevernote.Global;
\r
34 public class XMLCleanup {
\r
35 private String content;
\r
36 private QDomDocument doc;
\r
37 private final List<String> resources;
\r
39 public XMLCleanup() {
\r
40 resources = new ArrayList<String>();
\r
44 public void setValue(String text) {
\r
47 public String getValue() {
\r
50 // Validate the contents of the note. Change unsupported things
\r
51 public void validate() {
\r
52 doc = new QDomDocument();
\r
53 int br = content.lastIndexOf("</en-note>");
\r
54 content = new String(content.substring(0,br));
\r
56 int k = content.indexOf("<en-note");
\r
59 newContent = new String(content.substring(k));
\r
62 // Fix the background color
\r
65 newContent = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
\r
66 +"<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">\n"
\r
70 QDomDocument.Result result = doc.setContent(newContent);
\r
71 if (!result.success) {
\r
72 System.out.println("DOM error in XMLValidator.validate()");
\r
73 System.out.println(newContent);
\r
74 System.out.println("Location : Line-"+result.errorLine +" Column-" + result.errorColumn);
\r
75 System.out.println("Exiting");
\r
80 QDomNodeList noteAnchors = doc.elementsByTagName("en-note");
\r
81 int noteCount = noteAnchors.length();
\r
82 for (int i=noteCount-1; i>=0; i--) {
\r
83 if (noteAnchors.at(i).toElement().hasAttribute("style")) {
\r
84 String style = noteAnchors.at(i).toElement().attribute("style");
\r
85 int startColor = style.indexOf("background-color:");
\r
86 if (startColor > -1) {
\r
87 String color = style.substring(startColor+17);
\r
88 color = color.substring(0,color.indexOf(";"));
\r
89 noteAnchors.at(i).toElement().setAttribute("bgcolor", color);
\r
94 // Scan through tags node by node
\r
97 // Scan again making sure we didn't miss any <a> tags. Sometimes we do
\r
98 QDomNodeList anchorList = doc.elementsByTagName("a");
\r
99 int anchorCount = anchorList.length();
\r
100 for (int i=anchorCount-1; i>=0; i--) {
\r
101 QDomNode link = anchorList.at(i);
\r
102 link = fixLinkNode(link);
\r
105 // Remove invalid elements & attributes
\r
106 // Modify en-media tags
\r
107 QDomNodeList anchors;
\r
108 for (String key : Global.invalidAttributes.keySet()) {
\r
109 anchors = doc.elementsByTagName(key);
\r
110 int enMediaCount = anchors.length();
\r
111 for (int i=enMediaCount-1; i>=0; i--) {
\r
112 QDomElement element = anchors.at(i).toElement();
\r
113 ArrayList<String> names = Global.invalidAttributes.get(element.nodeName().toLowerCase());
\r
114 if (names != null) {
\r
115 for (int j=0; j<names.size(); j++) {
\r
116 element.removeAttribute(names.get(j));
\r
122 List<String> elements = Global.invalidElements;
\r
123 for (int j=0; j<elements.size(); j++) {
\r
124 anchors = doc.elementsByTagName(elements.get(j));
\r
125 int enMediaCount = anchors.length();
\r
126 for (int i=enMediaCount-1; i>=0; i--) {
\r
127 QDomElement element = anchors.at(i).toElement();
\r
128 element.setTagName("span");
\r
131 // Replace the XML carrage returns that the toString() creates.
\r
132 content = doc.toString().replace( "
", "" );
\r
135 // Start looking through the tree.
\r
136 private void scanTags() {
\r
138 if (doc.hasChildNodes())
\r
139 parseNodes(doc.childNodes());
\r
143 private void parseNodes(QDomNodeList nodes) {
\r
144 for (int i=0; i<nodes.size(); i++) {
\r
145 QDomNode node = nodes.at(i);
\r
146 if (node.hasChildNodes())
\r
147 parseNodes(node.childNodes());
\r
153 // Fix the contents of the node back to ENML.
\r
154 private void fixNode(QDomNode node) {
\r
155 QDomElement scanChecked = node.toElement();
\r
156 if (scanChecked.hasAttribute("checked")) {
\r
157 System.out.println(scanChecked.attribute("checked"));
\r
158 if (!scanChecked.attribute("checked").equalsIgnoreCase("true"))
\r
159 scanChecked.setAttribute("checked", "false");
\r
161 if (node.nodeName().equalsIgnoreCase("#comment") || node.nodeName().equalsIgnoreCase("script")) {
\r
162 node.parentNode().removeChild(node);
\r
164 if (node.nodeName().equalsIgnoreCase("input")) {
\r
165 QDomElement e = node.toElement();
\r
166 e.setTagName("en-todo");
\r
167 String value = e.attribute("value");
\r
168 e.removeAttribute("value");
\r
169 e.removeAttribute("unchecked");
\r
170 e.setAttribute("checked", value);
\r
171 e.removeAttribute("onclick");
\r
172 e.removeAttribute("type");
\r
175 if (node.nodeName().equalsIgnoreCase("a")) {
\r
176 node = fixLinkNode(node);
\r
178 // Restore image resources
\r
179 if (node.nodeName().equalsIgnoreCase("img")) {
\r
180 QDomElement e = node.toElement();
\r
181 String enType = e.attribute("en-tag");
\r
183 // Check if we have an en-crypt tag. Change it from an img to en-crypt
\r
184 if (enType.equalsIgnoreCase("en-crypt")) {
\r
186 String encrypted = e.attribute("alt");
\r
188 QDomText crypt = doc.createTextNode(encrypted);
\r
189 e.appendChild(crypt);
\r
191 e.removeAttribute("v:shapes");
\r
192 e.removeAttribute("en-tag");
\r
193 e.removeAttribute("contenteditable");
\r
194 e.removeAttribute("alt");
\r
195 e.removeAttribute("src");
\r
196 e.removeAttribute("id");
\r
197 e.removeAttribute("onclick");
\r
198 e.removeAttribute("onmouseover");
\r
199 e.setTagName("en-crypt");
\r
200 node.removeChild(e);
\r
204 // Check if we have a LaTeX image. Remove the parent link tag
\r
205 if (enType.equalsIgnoreCase("en-latex")) {
\r
206 enType = "en-media";
\r
207 QDomNode parent = e.parentNode();
\r
208 parent.removeChild(e);
\r
209 parent.parentNode().replaceChild(e, parent);
\r
212 // If we've gotten this far, we have an en-media tag
\r
213 e.setTagName(enType);
\r
214 resources.add(e.attribute("guid"));
\r
215 e.removeAttribute("guid");
\r
216 e.removeAttribute("src");
\r
217 e.removeAttribute("en-new");
\r
218 e.removeAttribute("en-tag");
\r
221 // Tags like <ul><ul><li>1</li></ul></ul> are technically valid, but Evernote
\r
222 // expects that a <ul> tag only has a <li>, so we will need to change them
\r
223 // to this: <ul><li><ul><li>1</li></ul></li></ul>
\r
224 if (node.nodeName().equalsIgnoreCase("ul")) {
\r
225 QDomNode firstChild = node.firstChild();
\r
226 QDomElement childElement = firstChild.toElement();
\r
227 if (childElement.nodeName().equalsIgnoreCase("ul")) {
\r
228 QDomElement newElement = doc.createElement("li");
\r
229 node.insertBefore(newElement, firstChild);
\r
230 node.removeChild(firstChild);
\r
231 newElement.appendChild(firstChild);
\r
235 if (node.nodeName().equalsIgnoreCase("en-hilight")) {
\r
236 QDomElement e = node.toElement();
\r
237 QDomText newText = doc.createTextNode(e.text());
\r
238 e.parentNode().replaceChild(newText,e);
\r
240 if (node.nodeName().equalsIgnoreCase("span")) {
\r
241 QDomElement e = node.toElement();
\r
242 if (e.attribute("class").equalsIgnoreCase("en-hilight") || e.attribute("class").equalsIgnoreCase("en-spell")) {
\r
243 QDomText newText = doc.createTextNode(e.text());
\r
244 e.parentNode().replaceChild(newText,e);
\r
246 if (e.attribute("pdfnavigationtable").equalsIgnoreCase("true")) {
\r
247 node.parentNode().removeChild(node);
\r
251 // Fix up encryption tag
\r
252 if (node.nodeName().equalsIgnoreCase("en-crypt-temp")) {
\r
253 QDomElement e = node.toElement();
\r
254 e.setTagName("en-crypt");
\r
255 String crypt = e.attribute("value");
\r
256 e.removeAttribute("value");
\r
257 QDomText cryptValue = doc.createTextNode(crypt);
\r
258 e.appendChild(cryptValue);
\r
263 private QDomNode fixLinkNode(QDomNode node) {
\r
264 QDomElement e = node.toElement();
\r
265 String enTag = e.attribute("en-tag");
\r
266 if (enTag.equalsIgnoreCase("en-media")) {
\r
267 e.setTagName("en-media");
\r
268 e.removeAttribute("en-type");
\r
269 e.removeAttribute("en-tag");
\r
270 e.removeAttribute("en-new");
\r
271 resources.add(e.attribute("guid"));
\r
272 e.removeAttribute("href");
\r
273 e.removeAttribute("guid");
\r
274 e.setNodeValue("");
\r
275 e.removeChild(e.firstChildElement());
\r
281 // Return old resources we've found
\r
282 public List<String> getResources() {
\r