OSDN Git Service

Merge remote-tracking branch 'origin/develop'
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / xml / XMLCleanup.java
1 /*\r
2  * This file is part of NixNote/NeighborNote \r
3  * Copyright 2009 Randy Baumgarte\r
4  * \r
5  * This file may be licensed under the terms of of the\r
6  * GNU General Public License Version 2 (the ``GPL'').\r
7  *\r
8  * Software distributed under the License is distributed\r
9  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
10  * express or implied. See the GPL for the specific language\r
11  * governing rights and limitations.\r
12  *\r
13  * You should have received a copy of the GPL along with this\r
14  * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
15  * or write to the Free Software Foundation, Inc.,\r
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
17  *\r
18 */\r
19 \r
20 \r
21 package cx.fbn.nevernote.xml;\r
22 \r
23 import java.util.ArrayList;\r
24 import java.util.List;\r
25 \r
26 import com.trolltech.qt.xml.QDomDocument;\r
27 import com.trolltech.qt.xml.QDomElement;\r
28 import com.trolltech.qt.xml.QDomNode;\r
29 import com.trolltech.qt.xml.QDomNodeList;\r
30 import com.trolltech.qt.xml.QDomText;\r
31 \r
32 import cx.fbn.nevernote.Global;\r
33 \r
34 public class XMLCleanup {\r
35         private String content;\r
36         private QDomDocument doc;\r
37         private final List<String> resources;\r
38         \r
39         public XMLCleanup() {\r
40                 resources = new ArrayList<String>();\r
41         }\r
42         \r
43         \r
44         public void setValue(String text) {\r
45                 content = text;\r
46         }\r
47         public String getValue() {\r
48                 return content;\r
49         }\r
50         // Validate the contents of the note.  Change unsupported things        \r
51         public void validate() {\r
52                 doc = new QDomDocument();\r
53                 int br = content.lastIndexOf("</en-note>");\r
54                 content = new String(content.substring(0,br));\r
55                 String newContent;\r
56                 int k = content.indexOf("<en-note");\r
57 \r
58                 \r
59                 newContent = new String(content.substring(k));\r
60                 \r
61                 \r
62                 // Fix the background color\r
63                 \r
64 \r
65                 newContent = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" \r
66                                         +"<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">\n"\r
67                                         +newContent \r
68                                         +"</en-note>";\r
69 \r
70                 QDomDocument.Result result = doc.setContent(newContent);\r
71                 if (!result.success) {\r
72                         System.out.println("DOM error in XMLValidator.validate()");\r
73                         System.out.println(newContent);\r
74                         System.out.println("Location : Line-"+result.errorLine +" Column-" + result.errorColumn);\r
75                         System.out.println("Exiting");\r
76                         content = null;\r
77                         return;\r
78                 }\r
79                 \r
80                 QDomNodeList noteAnchors = doc.elementsByTagName("en-note");\r
81                 int noteCount = noteAnchors.length();\r
82                 for (int i=noteCount-1; i>=0; i--) {\r
83                         if (noteAnchors.at(i).toElement().hasAttribute("style")) {\r
84                                 String style = noteAnchors.at(i).toElement().attribute("style");\r
85                                 int startColor = style.indexOf("background-color:");\r
86                                 if (startColor > -1) {\r
87                                         String color = style.substring(startColor+17);\r
88                                         color = color.substring(0,color.indexOf(";"));\r
89                                         noteAnchors.at(i).toElement().setAttribute("bgcolor", color);\r
90                                 }\r
91                         }\r
92                 }\r
93                 \r
94                 // Scan through tags node by node\r
95                 scanTags();\r
96                 \r
97                 // Scan again making sure we didn't miss any <a> tags.  Sometimes we do\r
98                 QDomNodeList anchorList = doc.elementsByTagName("a");\r
99                 int anchorCount = anchorList.length();\r
100                 for (int i=anchorCount-1; i>=0; i--) {\r
101                         QDomNode link = anchorList.at(i);\r
102                         link = fixLinkNode(link);\r
103                 }\r
104                 \r
105                 // Remove invalid elements & attributes\r
106                 // Modify en-media tags\r
107                 QDomNodeList anchors;\r
108                 for (String key : Global.invalidAttributes.keySet()) {\r
109                         anchors = doc.elementsByTagName(key);\r
110                         int enMediaCount = anchors.length();\r
111                         for (int i=enMediaCount-1; i>=0; i--) {\r
112                                 QDomElement element = anchors.at(i).toElement();\r
113                                 ArrayList<String> names = Global.invalidAttributes.get(element.nodeName().toLowerCase());\r
114                                 if (names != null) {    \r
115                                         for (int j=0; j<names.size(); j++) {\r
116                                                 element.removeAttribute(names.get(j));\r
117                                         }\r
118                                 }\r
119                         }\r
120                 }\r
121 \r
122                 List<String> elements = Global.invalidElements;\r
123                 for (int j=0; j<elements.size(); j++) {\r
124                         anchors = doc.elementsByTagName(elements.get(j));\r
125                         int enMediaCount = anchors.length();\r
126                         for (int i=enMediaCount-1; i>=0; i--) {\r
127                                 QDomElement element = anchors.at(i).toElement();\r
128                                 element.setTagName("span");\r
129                         }\r
130                 }\r
131                 // Replace the XML carrage returns that the toString() creates.\r
132                 content = doc.toString().replace( "&#xd;", "" );\r
133 \r
134         }\r
135         // Start looking through the tree.\r
136         private void scanTags() {       \r
137                 \r
138                 if (doc.hasChildNodes())\r
139                         parseNodes(doc.childNodes());\r
140                 return;\r
141         }\r
142         \r
143         private void parseNodes(QDomNodeList nodes) {\r
144                 for (int i=0; i<nodes.size(); i++) {\r
145                         QDomNode node = nodes.at(i);\r
146                         if (node.hasChildNodes())\r
147                                 parseNodes(node.childNodes());\r
148                         fixNode(node);\r
149                 }\r
150         }\r
151         \r
152 \r
153         // Fix the contents of the node back to ENML.\r
154         private void fixNode(QDomNode node) {\r
155                 QDomElement scanChecked = node.toElement();\r
156                 if (scanChecked.hasAttribute("checked")) {\r
157                         System.out.println(scanChecked.attribute("checked"));\r
158                         if (!scanChecked.attribute("checked").equalsIgnoreCase("true"))\r
159                                 scanChecked.setAttribute("checked", "false");\r
160                 }\r
161                 if (node.nodeName().equalsIgnoreCase("#comment") || node.nodeName().equalsIgnoreCase("script")) {\r
162                         node.parentNode().removeChild(node);\r
163                 }\r
164                 if (node.nodeName().equalsIgnoreCase("input")) {\r
165                         QDomElement e = node.toElement();\r
166                         e.setTagName("en-todo");\r
167                         String value = e.attribute("value");\r
168                         if (value.trim().equals(""))\r
169                                 value = "false";\r
170                         e.removeAttribute("value");\r
171                         e.removeAttribute("unchecked");\r
172                         e.setAttribute("checked", value);\r
173                         e.removeAttribute("onclick");\r
174                         e.removeAttribute("onmouseover");\r
175                         e.removeAttribute("type");\r
176                 }\r
177 \r
178                 if (node.nodeName().equalsIgnoreCase("a")) {\r
179                         node = fixLinkNode(node);\r
180                 }\r
181                 // Restore image resources\r
182                 if (node.nodeName().equalsIgnoreCase("img")) {\r
183                         QDomElement e = node.toElement();\r
184                         String enType = e.attribute("en-tag");\r
185                         \r
186                         // Check if we have an en-crypt tag.  Change it from an img to en-crypt\r
187                         if (enType.equalsIgnoreCase("en-crypt")) {\r
188                                 \r
189                                 String encrypted = e.attribute("alt");\r
190                                 \r
191                                 QDomText crypt = doc.createTextNode(encrypted);\r
192                                 e.appendChild(crypt);\r
193                                 \r
194                                 e.removeAttribute("v:shapes");\r
195                                 e.removeAttribute("en-tag");\r
196                                 e.removeAttribute("contenteditable");\r
197                                 e.removeAttribute("alt");\r
198                                 e.removeAttribute("src");\r
199                                 e.removeAttribute("id");\r
200                                 e.removeAttribute("onclick");\r
201                                 e.removeAttribute("onmouseover");\r
202                                 e.setTagName("en-crypt");\r
203                                 node.removeChild(e);\r
204                                 return;\r
205                         }\r
206 \r
207                         // Check if we have a LaTeX image.  Remove the parent link tag\r
208                         if (enType.equalsIgnoreCase("en-latex")) {\r
209                                 enType = "en-media";\r
210                                 QDomNode parent = e.parentNode();\r
211                                 parent.removeChild(e);\r
212                                 parent.parentNode().replaceChild(e, parent);\r
213                         }\r
214                         \r
215                         // If we've gotten this far, we have an en-media tag\r
216                         e.setTagName(enType);\r
217                         resources.add(e.attribute("guid"));\r
218                         e.removeAttribute("guid");\r
219                         e.removeAttribute("src");\r
220                         e.removeAttribute("en-new");\r
221                         e.removeAttribute("en-tag");\r
222                 }\r
223                 \r
224                 // Tags like <ul><ul><li>1</li></ul></ul> are technically valid, but Evernote \r
225                 // expects that a <ul> tag only has a <li>, so we will need to change them\r
226                 // to this:  <ul><li><ul><li>1</li></ul></li></ul>\r
227                 if (node.nodeName().equalsIgnoreCase("ul")) {\r
228                         QDomNode firstChild = node.firstChild();\r
229                         QDomElement childElement = firstChild.toElement();\r
230                         if (childElement.nodeName().equalsIgnoreCase("ul")) {\r
231                                 QDomElement newElement = doc.createElement("li");\r
232                                 node.insertBefore(newElement, firstChild);\r
233                                 node.removeChild(firstChild);\r
234                                 newElement.appendChild(firstChild);\r
235                         }\r
236                 }\r
237                 \r
238                 if (node.nodeName().equalsIgnoreCase("en-hilight")) {\r
239                         QDomElement e = node.toElement();\r
240                         QDomText newText = doc.createTextNode(e.text());\r
241                         e.parentNode().replaceChild(newText,e);\r
242                 }\r
243                 if (node.nodeName().equalsIgnoreCase("span")) {\r
244                         QDomElement e = node.toElement();\r
245                         if (e.attribute("class").equalsIgnoreCase("en-hilight") || e.attribute("class").equalsIgnoreCase("en-spell")) {\r
246                                 QDomText newText = doc.createTextNode(e.text());\r
247                                 e.parentNode().replaceChild(newText,e);\r
248                         }\r
249                         if (e.attribute("pdfnavigationtable").equalsIgnoreCase("true")) {\r
250                                 node.parentNode().removeChild(node);\r
251                         }\r
252                 }\r
253                 \r
254                 // Fix up encryption tag\r
255                 if (node.nodeName().equalsIgnoreCase("en-crypt-temp")) {\r
256                         QDomElement e = node.toElement();\r
257                         e.setTagName("en-crypt");\r
258                         String crypt = e.attribute("value");\r
259                         e.removeAttribute("value");\r
260                         QDomText cryptValue = doc.createTextNode(crypt);\r
261                         e.appendChild(cryptValue);\r
262                 }\r
263         }\r
264 \r
265         \r
266         private QDomNode fixLinkNode(QDomNode node) {\r
267                 QDomElement e = node.toElement();\r
268                 String enTag = e.attribute("en-tag");\r
269                 if (enTag.equalsIgnoreCase("en-media")) {\r
270                         e.setTagName("en-media");\r
271                         e.removeAttribute("en-type");\r
272                         e.removeAttribute("en-tag");\r
273                         e.removeAttribute("en-new");\r
274                         resources.add(e.attribute("guid"));\r
275                         e.removeAttribute("href");\r
276                         e.removeAttribute("guid");\r
277                         e.setNodeValue("");\r
278                         e.removeChild(e.firstChildElement());\r
279                 }\r
280                 return e;\r
281         }\r
282 \r
283         \r
284         // Return old resources we've found\r
285         public List<String> getResources() {\r
286                 return resources;\r
287         }\r
288 \r
289 }\r
290 \r
291