OSDN Git Service

29584f097e79ab5f4cb530ab7787045e9aade746
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / xml / XMLCleanup.java
1 /*\r
2  * This file is part of NixNote \r
3  * Copyright 2009 Randy Baumgarte\r
4  * \r
5  * This file may be licensed under the terms of of the\r
6  * GNU General Public License Version 2 (the ``GPL'').\r
7  *\r
8  * Software distributed under the License is distributed\r
9  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
10  * express or implied. See the GPL for the specific language\r
11  * governing rights and limitations.\r
12  *\r
13  * You should have received a copy of the GPL along with this\r
14  * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
15  * or write to the Free Software Foundation, Inc.,\r
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
17  *\r
18 */\r
19 \r
20 \r
21 package cx.fbn.nevernote.xml;\r
22 \r
23 import java.util.ArrayList;\r
24 import java.util.List;\r
25 \r
26 import com.trolltech.qt.xml.QDomDocument;\r
27 import com.trolltech.qt.xml.QDomElement;\r
28 import com.trolltech.qt.xml.QDomNode;\r
29 import com.trolltech.qt.xml.QDomNodeList;\r
30 import com.trolltech.qt.xml.QDomText;\r
31 \r
32 import cx.fbn.nevernote.Global;\r
33 \r
34 public class XMLCleanup {\r
35         private String content;\r
36         private QDomDocument doc;\r
37         private final List<String> resources;\r
38         \r
39         public XMLCleanup() {\r
40                 resources = new ArrayList<String>();\r
41         }\r
42         \r
43         \r
44         public void setValue(String text) {\r
45                 content = text;\r
46         }\r
47         public String getValue() {\r
48                 return content;\r
49         }\r
50         // Validate the contents of the note.  Change unsupported things        \r
51         public void validate() {\r
52                 doc = new QDomDocument();\r
53                 int br = content.lastIndexOf("</en-note>");\r
54                 content = new String(content.substring(0,br));\r
55                 String newContent;\r
56                 int k = content.indexOf("<en-note");\r
57 \r
58                 \r
59                 newContent = new String(content.substring(k));\r
60                 \r
61                 \r
62                 // Fix the background color\r
63                 \r
64 \r
65                 newContent = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" \r
66                                         +"<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">\n"\r
67                                         +newContent \r
68                                         +"</en-note>";\r
69 \r
70                 QDomDocument.Result result = doc.setContent(newContent);\r
71                 if (!result.success) {\r
72                         System.out.println("DOM error in XMLValidator.validate()");\r
73                         System.out.println(newContent);\r
74                         System.out.println("Location : Line-"+result.errorLine +" Column-" + result.errorColumn);\r
75                         System.out.println("Exiting");\r
76                         content = null;\r
77                         return;\r
78                 }\r
79                 \r
80                 QDomNodeList noteAnchors = doc.elementsByTagName("en-note");\r
81                 int noteCount = noteAnchors.length();\r
82                 for (int i=noteCount-1; i>=0; i--) {\r
83                         if (noteAnchors.at(i).toElement().hasAttribute("style")) {\r
84                                 String style = noteAnchors.at(i).toElement().attribute("style");\r
85                                 int startColor = style.indexOf("background-color:");\r
86                                 if (startColor > -1) {\r
87                                         String color = style.substring(startColor+17);\r
88                                         color = color.substring(0,color.indexOf(";"));\r
89                                         noteAnchors.at(i).toElement().setAttribute("bgcolor", color);\r
90                                 }\r
91                         }\r
92                 }\r
93                 \r
94                 // Scan through tags node by node\r
95                 scanTags();\r
96                 \r
97                 // Scan again making sure we didn't miss any <a> tags.  Sometimes we do\r
98                 QDomNodeList anchorList = doc.elementsByTagName("a");\r
99                 int anchorCount = anchorList.length();\r
100                 for (int i=anchorCount-1; i>=0; i--) {\r
101                         QDomNode link = anchorList.at(i);\r
102                         link = fixLinkNode(link);\r
103                 }\r
104                 \r
105                 // Remove invalid elements & attributes\r
106                 // Modify en-media tags\r
107                 QDomNodeList anchors;\r
108                 for (String key : Global.invalidAttributes.keySet()) {\r
109                         anchors = doc.elementsByTagName(key);\r
110                         int enMediaCount = anchors.length();\r
111                         for (int i=enMediaCount-1; i>=0; i--) {\r
112                                 QDomElement element = anchors.at(i).toElement();\r
113                                 ArrayList<String> names = Global.invalidAttributes.get(element.nodeName().toLowerCase());\r
114                                 if (names != null) {    \r
115                                         for (int j=0; j<names.size(); j++) {\r
116                                                 element.removeAttribute(names.get(j));\r
117                                         }\r
118                                 }\r
119                         }\r
120                 }\r
121 \r
122                 List<String> elements = Global.invalidElements;\r
123                 for (int j=0; j<elements.size(); j++) {\r
124                         anchors = doc.elementsByTagName(elements.get(j));\r
125                         int enMediaCount = anchors.length();\r
126                         for (int i=enMediaCount-1; i>=0; i--) {\r
127                                 QDomElement element = anchors.at(i).toElement();\r
128                                 element.setTagName("span");\r
129                         }\r
130                 }\r
131                 // Replace the XML carrage returns that the toString() creates.\r
132                 content = doc.toString().replace( "&#xd;", "" );\r
133 \r
134         }\r
135         // Start looking through the tree.\r
136         private void scanTags() {       \r
137                 \r
138                 if (doc.hasChildNodes())\r
139                         parseNodes(doc.childNodes());\r
140                 return;\r
141         }\r
142         \r
143         private void parseNodes(QDomNodeList nodes) {\r
144                 for (int i=0; i<nodes.size(); i++) {\r
145                         QDomNode node = nodes.at(i);\r
146                         if (node.hasChildNodes())\r
147                                 parseNodes(node.childNodes());\r
148                         fixNode(node);\r
149                 }\r
150         }\r
151         \r
152 \r
153         // Fix the contents of the node back to ENML.\r
154         private void fixNode(QDomNode node) {\r
155                 QDomElement scanChecked = node.toElement();\r
156                 if (scanChecked.hasAttribute("checked")) {\r
157                         System.out.println(scanChecked.attribute("checked"));\r
158                         if (!scanChecked.attribute("checked").equalsIgnoreCase("true"))\r
159                                 scanChecked.setAttribute("checked", "false");\r
160                 }\r
161                 if (node.nodeName().equalsIgnoreCase("#comment") || node.nodeName().equalsIgnoreCase("script")) {\r
162                         node.parentNode().removeChild(node);\r
163                 }\r
164                 if (node.nodeName().equalsIgnoreCase("input")) {\r
165                         QDomElement e = node.toElement();\r
166                         e.setTagName("en-todo");\r
167                         String value = e.attribute("value");\r
168                         e.removeAttribute("value");\r
169                         e.removeAttribute("unchecked");\r
170                         e.setAttribute("checked", value);\r
171                         e.removeAttribute("onclick");\r
172                         e.removeAttribute("type");\r
173                 }\r
174 \r
175                 if (node.nodeName().equalsIgnoreCase("a")) {\r
176                         node = fixLinkNode(node);\r
177                 }\r
178                 // Restore image resources\r
179                 if (node.nodeName().equalsIgnoreCase("img")) {\r
180                         QDomElement e = node.toElement();\r
181                         String enType = e.attribute("en-tag");\r
182                         \r
183                         // Check if we have an en-crypt tag.  Change it from an img to en-crypt\r
184                         if (enType.equalsIgnoreCase("en-crypt")) {\r
185                                 \r
186                                 String encrypted = e.attribute("alt");\r
187                                 \r
188                                 QDomText crypt = doc.createTextNode(encrypted);\r
189                                 e.appendChild(crypt);\r
190                                 \r
191                                 e.removeAttribute("v:shapes");\r
192                                 e.removeAttribute("en-tag");\r
193                                 e.removeAttribute("contenteditable");\r
194                                 e.removeAttribute("alt");\r
195                                 e.removeAttribute("src");\r
196                                 e.removeAttribute("id");\r
197                                 e.removeAttribute("onclick");\r
198                                 e.removeAttribute("onmouseover");\r
199                                 e.setTagName("en-crypt");\r
200                                 node.removeChild(e);\r
201                                 return;\r
202                         }\r
203 \r
204                         // Check if we have a LaTeX image.  Remove the parent link tag\r
205                         if (enType.equalsIgnoreCase("en-latex")) {\r
206                                 enType = "en-media";\r
207                                 QDomNode parent = e.parentNode();\r
208                                 parent.removeChild(e);\r
209                                 parent.parentNode().replaceChild(e, parent);\r
210                         }\r
211                         \r
212                         // If we've gotten this far, we have an en-media tag\r
213                         e.setTagName(enType);\r
214                         resources.add(e.attribute("guid"));\r
215                         e.removeAttribute("guid");\r
216                         e.removeAttribute("src");\r
217                         e.removeAttribute("en-new");\r
218                         e.removeAttribute("en-tag");\r
219                 }\r
220                 \r
221                 // Tags like <ul><ul><li>1</li></ul></ul> are technically valid, but Evernote \r
222                 // expects that a <ul> tag only has a <li>, so we will need to change them\r
223                 // to this:  <ul><li><ul><li>1</li></ul></li></ul>\r
224                 if (node.nodeName().equalsIgnoreCase("ul")) {\r
225                         QDomNode firstChild = node.firstChild();\r
226                         QDomElement childElement = firstChild.toElement();\r
227                         if (childElement.nodeName().equalsIgnoreCase("ul")) {\r
228                                 QDomElement newElement = doc.createElement("li");\r
229                                 node.insertBefore(newElement, firstChild);\r
230                                 node.removeChild(firstChild);\r
231                                 newElement.appendChild(firstChild);\r
232                         }\r
233                 }\r
234                 \r
235                 if (node.nodeName().equalsIgnoreCase("en-hilight")) {\r
236                         QDomElement e = node.toElement();\r
237                         QDomText newText = doc.createTextNode(e.text());\r
238                         e.parentNode().replaceChild(newText,e);\r
239                 }\r
240                 if (node.nodeName().equalsIgnoreCase("span")) {\r
241                         QDomElement e = node.toElement();\r
242                         if (e.attribute("class").equalsIgnoreCase("en-hilight") || e.attribute("class").equalsIgnoreCase("en-spell")) {\r
243                                 QDomText newText = doc.createTextNode(e.text());\r
244                                 e.parentNode().replaceChild(newText,e);\r
245                         }\r
246                         if (e.attribute("pdfnavigationtable").equalsIgnoreCase("true")) {\r
247                                 node.parentNode().removeChild(node);\r
248                         }\r
249                 }\r
250                 \r
251                 // Fix up encryption tag\r
252                 if (node.nodeName().equalsIgnoreCase("en-crypt-temp")) {\r
253                         QDomElement e = node.toElement();\r
254                         e.setTagName("en-crypt");\r
255                         String crypt = e.attribute("value");\r
256                         e.removeAttribute("value");\r
257                         QDomText cryptValue = doc.createTextNode(crypt);\r
258                         e.appendChild(cryptValue);\r
259                 }\r
260         }\r
261 \r
262         \r
263         private QDomNode fixLinkNode(QDomNode node) {\r
264                 QDomElement e = node.toElement();\r
265                 String enTag = e.attribute("en-tag");\r
266                 if (enTag.equalsIgnoreCase("en-media")) {\r
267                         e.setTagName("en-media");\r
268                         e.removeAttribute("en-type");\r
269                         e.removeAttribute("en-tag");\r
270                         e.removeAttribute("en-new");\r
271                         resources.add(e.attribute("guid"));\r
272                         e.removeAttribute("href");\r
273                         e.removeAttribute("guid");\r
274                         e.setNodeValue("");\r
275                         e.removeChild(e.firstChildElement());\r
276                 }\r
277                 return e;\r
278         }\r
279 \r
280         \r
281         // Return old resources we've found\r
282         public List<String> getResources() {\r
283                 return resources;\r
284         }\r
285 \r
286 }\r
287 \r
288