OSDN Git Service

Cleanup compiler warning messages and alter backup & restore to handle new database...
[neighbornote/NeighborNote.git] / src / cx / fbn / nevernote / evernote / EnmlConverter.java
1 /*\r
2  * This file is part of NeverNote \r
3  * Copyright 2009 Randy Baumgarte\r
4  * \r
5  * This file may be licensed under the terms of of the\r
6  * GNU General Public License Version 2 (the ``GPL'').\r
7  *\r
8  * Software distributed under the License is distributed\r
9  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either\r
10  * express or implied. See the GPL for the specific language\r
11  * governing rights and limitations.\r
12  *\r
13  * You should have received a copy of the GPL along with this\r
14  * program. If not, go to http://www.gnu.org/licenses/gpl.html\r
15  * or write to the Free Software Foundation, Inc.,\r
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\r
17  *\r
18 */\r
19 package cx.fbn.nevernote.evernote;\r
20 \r
21 import java.io.ByteArrayInputStream;\r
22 import java.io.ByteArrayOutputStream;\r
23 import java.io.File;\r
24 import java.util.ArrayList;\r
25 import java.util.List;\r
26 \r
27 import org.w3c.tidy.Tidy;\r
28 import org.w3c.tidy.TidyMessage;\r
29 \r
30 import com.trolltech.qt.core.QByteArray;\r
31 import com.trolltech.qt.core.QTextCodec;\r
32 \r
33 import cx.fbn.nevernote.Global;\r
34 import cx.fbn.nevernote.utilities.ApplicationLogger;\r
35 import cx.fbn.nevernote.utilities.Pair;\r
36 import cx.fbn.nevernote.xml.XMLCleanup;\r
37 import cx.fbn.nevernote.xml.XMLNoteRepair;\r
38 \r
39 public class EnmlConverter {\r
40         private final ApplicationLogger logger;\r
41         private List<String>                    resources;\r
42         public boolean saveInvalidXML;\r
43         \r
44         private class TidyListener implements org.w3c.tidy.TidyMessageListener {\r
45                 \r
46                 ApplicationLogger logger;\r
47                 public boolean errorFound; \r
48                 \r
49                 public TidyListener(ApplicationLogger logger) {\r
50                         this.logger = logger;\r
51                         errorFound = false;\r
52                 }\r
53                 @Override\r
54                 public void messageReceived(TidyMessage msg) {\r
55                         if (msg.getLevel() == TidyMessage.Level.ERROR) {\r
56                                 logger.log(logger.LOW, "******* JTIDY ERORR *******");\r
57                                 logger.log(logger.LOW, "Error Code: " +msg.getErrorCode());\r
58                                 logger.log(logger.LOW, "Column: " +msg.getColumn());\r
59                                 logger.log(logger.LOW, "Column: " +msg.getColumn());\r
60                                 logger.log(logger.LOW, "Line: " +msg.getLine());\r
61                                 logger.log(logger.LOW, "Message: " +msg.getMessage());\r
62                                 logger.log(logger.LOW, "***************************");\r
63                                 errorFound = true;\r
64                         } else \r
65                                 logger.log(logger.EXTREME, "JTidy Results: "+msg.getMessage());\r
66                 }\r
67                 \r
68         }\r
69         \r
70         public EnmlConverter(ApplicationLogger l) {\r
71                 logger = l;\r
72 //              conn = c;\r
73                 saveInvalidXML = false;\r
74                 resources = new ArrayList<String>();\r
75         }\r
76 \r
77         public List<String> getResources() {\r
78                 return resources;\r
79         }\r
80         public String convert(String noteGuid, String content) {\r
81                 logger.log(logger.HIGH, "Entering DBRunner.convertToEnml");\r
82                 logger.log(logger.EXTREME, "Note Text:" +content);\r
83                 \r
84                 // Replace the en-note tags with body tags in case we came from \r
85                 // someplace other than the editor (for example, if we are merging notes).\r
86                 content = content.replace("<en-note>", "<body>");\r
87                 content = content.replace("</en-note>", "</body>");\r
88                 // Start removing stuff we don't need or want\r
89                 int br = content.lastIndexOf("</body>");\r
90                 if (br > 0)\r
91                         content = new String(content.substring(0,br));\r
92                 String newContent;\r
93                 int k = content.indexOf("<body");\r
94                 if (k>-1)\r
95                         newContent = new String(content.substring(k));\r
96                 else\r
97                         newContent = "<body>"+content;\r
98 \r
99                 \r
100                 // Check that we have a vaild header.  Normally we should not\r
101                 // but sometimes it seems that we can.  I don't see how, but it is\r
102                 // easy enough to check.\r
103                 if (!newContent.startsWith("<?xml"))\r
104                         newContent = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" \r
105                                 +"<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">\n"\r
106                                 +newContent \r
107                                 +"</body>";\r
108                 \r
109 \r
110                 // Fix the more common XML problems that Webkit creates, but are not considered \r
111                 // valid XML.\r
112                 newContent = fixStupidXMLProblems(newContent);\r
113                 \r
114                 \r
115                 // Change the contents to have enml instead of body tags or\r
116                 // we'll fail validation later.\r
117                 newContent = newContent.replace("<body", "<en-note");\r
118                 newContent = newContent.replace("</body>", "</en-note>");\r
119                 \r
120                 // First pass through the data.  The goal of this pass is to \r
121                 // validate that we have a good XML document and to repair\r
122                 // any problems found.\r
123                 \r
124                 XMLNoteRepair repair = new XMLNoteRepair();\r
125 //              logger.log(logger.HIGH, "Checking XML Structure");\r
126 //              newContent = repair.parse(newContent, false);\r
127 //              logger.log(logger.HIGH, "Check complete");\r
128         \r
129         logger.log(logger.HIGH, "Fixing encryption tags");\r
130         newContent = fixEncryptionTags(newContent);\r
131                 \r
132                 Tidy tidy = new Tidy();\r
133                 TidyListener tidyListener = new TidyListener(logger);\r
134                 tidy.setMessageListener(tidyListener);\r
135                 tidy.getStderr().close();  // the listener will capture messages\r
136                 tidy.setXmlTags(true);\r
137                 \r
138                 QTextCodec codec;\r
139                 codec = QTextCodec.codecForName("UTF-8");\r
140         QByteArray unicode =  codec.fromUnicode(newContent);\r
141         \r
142 //              byte html[] = newContent.getBytes();\r
143 //              ByteArrayInputStream is = new ByteArrayInputStream(html);\r
144         logger.log(logger.HIGH, "Starting JTidy check");\r
145         logger.log(logger.EXTREME, "Start of JTidy Input");\r
146         logger.log(logger.EXTREME, newContent);\r
147         logger.log(logger.EXTREME, "End Of JTidy Input");\r
148                 ByteArrayInputStream is = new ByteArrayInputStream(unicode.toByteArray());\r
149         ByteArrayOutputStream os = new ByteArrayOutputStream();\r
150         tidy.setInputEncoding("UTF-8");\r
151                 tidy.parse(is, os);\r
152                 String tidyContent = os.toString();\r
153                 if (tidyListener.errorFound) {\r
154                         logger.log(logger.LOW, "Note Contents Begin");\r
155                         logger.log(logger.LOW, content);\r
156                         logger.log(logger.LOW, "Note Contents End");\r
157                         tidyContent = null;\r
158                 } else {\r
159                         if (newContent.trim().equals(""))\r
160                                 tidyContent = null;\r
161                 }\r
162 \r
163                 // If the repair above returned null, then the XML is foobar.\r
164                 // We are done here.\r
165                 if (tidyContent != null) {\r
166                         newContent = tidyContent;\r
167                 } else {\r
168                         // Houston, we've had a problem.  Fall back to old method\r
169                         logger.log(logger.HIGH, "Error converting to JTidy.  Falling back to old method");\r
170                         String repairedContent = repair.parse(newContent, false);\r
171                         if (repairedContent == null) {\r
172                                 logger.log(logger.EXTREME, "Null returned from repair.parse()");\r
173                                 logger.log(logger.LOW, "Parse error when converting to ENML. Aborting save");\r
174                                 return null;\r
175                         }\r
176                         newContent = repairedContent;\r
177                         logger.log(logger.EXTREME, "Start of repaired content");\r
178                         logger.log(logger.EXTREME, repairedContent);\r
179                         logger.log(logger.EXTREME, "End of repaired content");\r
180                 }\r
181                 \r
182                 // Second pass through the data.  The goal of this pass is to \r
183                 // remove any things we added in NeverNote that do not match\r
184                 // the ENML schema\r
185                 XMLCleanup v = new XMLCleanup();\r
186                 v.setValue(newContent);\r
187                 logger.log(logger.HIGH, "Beginning ENML Cleanup");\r
188                 v.validate();\r
189                 logger.log(logger.HIGH, "Cleanup complete.");\r
190                 \r
191         \r
192                         \r
193                 // Final pass through the data.  In this one we\r
194                 // remove any invalid attributes and to save the\r
195                 // new resources.\r
196                 logger.log(logger.EXTREME, "Rebuilt ENML:");\r
197                 logger.log(logger.EXTREME, v.getValue());       \r
198                 logger.log(logger.EXTREME, "End Of Rebuilt ENML:");\r
199                 resources = v.getResources();\r
200 \r
201                 \r
202                 // The XML has the dtd to validate set against Evernote's web\r
203                 // address.  We change it to a local one because otherwise it would\r
204                 // fail if the user doesn't have internet connectivity.  The local copy\r
205                 // also contains the 3 other PUBLIC definitions at the beginning of the dtd.\r
206                 newContent = v.getValue();\r
207                 File dtdFile = Global.getFileManager().getXMLDirFile("enml2.dtd");\r
208                 String dtd = dtdFile.toURI().toString();\r
209                 newContent = newContent.replace("<!DOCTYPE en-note SYSTEM \'http://xml.evernote.com/pub/enml2.dtd'>", \r
210                                 "<!DOCTYPE en-note SYSTEM \"" +dtd +"\">");\r
211                 \r
212                 logger.log(logger.HIGH, "Validating ENML");\r
213                 String repairedContent = repair.parse(newContent, true);\r
214                 if (repairedContent == null)\r
215                         logger.log(logger.EXTREME, "Null returned from repair.parse()");\r
216                 else\r
217                         newContent = repairedContent;\r
218                 logger.log(logger.HIGH, "Validation complete");\r
219                 saveInvalidXML = repair.saveInvalidXML;\r
220                 \r
221                 // Restore the correct XML header.\r
222                 newContent = newContent.replace("<!DOCTYPE en-note SYSTEM \"" +dtd +"\">", \r
223                                 "<!DOCTYPE en-note SYSTEM 'http://xml.evernote.com/pub/enml2.dtd'>");\r
224                 \r
225                 \r
226                 logger.log(logger.EXTREME, "Leaving ENMLConverter.convert()");\r
227                 return newContent;\r
228         }\r
229 \r
230         \r
231         private String fixEncryptionTags(String content) {\r
232                 // Fix the problem that the document body isn't properly closed\r
233                 String newContent = new String(content);\r
234                 logger.log(logger.MEDIUM, "Inside EnmlConverter.fixEncryptionTags");\r
235                 logger.log(logger.EXTREME, content);\r
236                 \r
237                 // Fix the problem that the img tag isn't properly closed\r
238                 int endPos, startPos, endData,slotStart, slotEnd;\r
239                 logger.log(logger.MEDIUM, "Checking table encryption tags");\r
240                 String eTag = "<table class=\"en-crypt-temp\"";\r
241                 for (int i=newContent.indexOf(eTag); i>0; i = newContent.indexOf(eTag,i+1)) {\r
242                         slotStart = newContent.indexOf("slot", i+1)+6;\r
243                         slotEnd = newContent.indexOf("\"",slotStart);\r
244                         String slot = newContent.substring(slotStart, slotEnd);\r
245                         startPos = newContent.indexOf("<td>", i+1)+4;\r
246                         endData = newContent.indexOf("</td>",startPos);\r
247                         String text = newContent.substring(startPos,endData);\r
248                         endPos = newContent.indexOf("</table>",i+1)+8;\r
249                         // Encrypt the text\r
250                         Pair<String,String> pair = Global.passwordSafe.get(slot);\r
251                         String password = pair.getFirst();\r
252                         String hint = pair.getSecond();\r
253                         EnCrypt crypt = new EnCrypt(); \r
254                         String encrypted = crypt.encrypt(text, password, 64); \r
255 \r
256                         // replace the table with an en-crypt tag.\r
257                         newContent = newContent.substring(0,i-1) + \r
258                                 "<en-crypt-temp cipher=\"RC2\" length=\"64\" hint=\""+\r
259                                 hint +"\" value=\""+\r
260                                 encrypted +\r
261                                 "\" />" +\r
262                                 newContent.substring(endPos);\r
263                 }\r
264                 \r
265                 return newContent;\r
266         }\r
267         \r
268         // Fix XML problems that Qt can't deal with\r
269         public String fixStupidXMLProblems(String content) {\r
270                 logger.log(logger.HIGH, "Entering DBRunner.fixStupidXMLProblems");\r
271 \r
272                 // Fix the problem that the document body isn't properly closed\r
273                 String newContent = new String(content);\r
274                 logger.log(logger.MEDIUM, "Inside fixStupidXMLProblems.  Old content:");\r
275                 logger.log(logger.EXTREME, content);\r
276                 \r
277                 // Fix the problem that the img tag isn't properly closed\r
278                 int endPos;\r
279                 logger.log(logger.MEDIUM, "Checking img tags");\r
280                 for (int i=newContent.indexOf("<img"); i>0; i = newContent.indexOf("<img",i+1)) {\r
281                         endPos = newContent.indexOf(">",i+1);\r
282                         String end = newContent.substring(endPos+1);\r
283                         newContent = newContent.subSequence(0,endPos) +"/>"+end;\r
284                 }\r
285                 \r
286                 // Fix the problem that the input tag isn't properly closed\r
287                 logger.log(logger.MEDIUM, "Checking input tags");\r
288                 for (int i=newContent.indexOf("<input"); i>0; i = newContent.indexOf("<input",i+1)) {\r
289                         endPos = newContent.indexOf(">",i+1);\r
290                         String end = newContent.substring(endPos+1);\r
291                         newContent = newContent.subSequence(0,endPos) +"/>"+end;\r
292                 }\r
293                 \r
294                 \r
295                 // Fix the problem that the <br> tag isn't properly closed\r
296                 logger.log(logger.MEDIUM, "Checking br tags");\r
297                 for (int i=newContent.indexOf("<br"); i>0; i = newContent.indexOf("<br",i+1)) {\r
298                         endPos = newContent.indexOf(">",i+1);\r
299                         String end = newContent.substring(endPos+1);\r
300                         newContent = newContent.subSequence(0,endPos) +"/>"+end;\r
301                 }\r
302                         \r
303                 // Fix the problem that the <hr> tag isn't properly closed\r
304                 logger.log(logger.MEDIUM, "Checking hr tags");\r
305                 for (int i=newContent.indexOf("<hr"); i>0; i = newContent.indexOf("<hr",i+1)) {\r
306                         endPos = newContent.indexOf(">",i+1);\r
307                         String end = newContent.substring(endPos+1);\r
308                         newContent = newContent.subSequence(0,endPos) +"/>"+end;\r
309                 }\r
310                 \r
311                 logger.log(logger.MEDIUM, "Leaving fixStupidXMLProblems");\r
312                 logger.log(logger.HIGH, "Leaving DBRunner.fixStupidXMLProblems");\r
313                 return newContent.toString();\r
314         }\r
315 \r
316 \r
317         // Fix XML that Evernote thinks is invalid\r
318         public String fixEnXMLCrap(String note) {\r
319                 logger.log(logger.EXTREME, "Entering EnmlConverter.fixEnXMLCrap");\r
320                 if (note == null)\r
321                         return null;\r
322                 \r
323                 int pos;\r
324                 StringBuffer buffer = new StringBuffer(note);\r
325                 \r
326                 logger.log(logger.EXTREME, "Converting <b/>");\r
327                 // change all <b/> to <b></b> because Evernote hates them if they happen in <span>\r
328                 pos = buffer.indexOf("<b/>");\r
329                 for (; pos>-1; ) {\r
330                         buffer.replace(pos, pos+4, "<b></b>");\r
331                         pos = buffer.indexOf("<b/>",pos);\r
332                 }\r
333                 // change all <br/> to <br></br> because Evernote hates them if they happen in <span>\r
334                 logger.log(logger.EXTREME, "converting <br/>");\r
335                 pos = buffer.indexOf("<br/>");\r
336                 for (; pos>-1; ) {\r
337                         buffer.replace(pos, pos+5, "<br></br>");\r
338                         pos = buffer.indexOf("<br/>",pos);\r
339                 }\r
340                 \r
341                 // change all <span> elements in lists because Evernote hates them if they happen \r
342                 int endPos = 0;\r
343                 int spanPos;\r
344                 pos = buffer.indexOf("<li>");\r
345                 spanPos = buffer.indexOf("<span>");\r
346                 // Get rid of empty spans in <li> elements\r
347                 pos = buffer.indexOf("<li>");\r
348                 spanPos = buffer.indexOf("<span/>");\r
349                 for (; pos>-1 && spanPos >-1;) {\r
350                         endPos = buffer.indexOf("</li>",pos);\r
351                         if (spanPos > pos && spanPos < endPos) {\r
352                                 buffer.replace(spanPos,spanPos+7,"");\r
353                         }\r
354                         pos=buffer.indexOf("<li>",pos+1);\r
355                         spanPos = buffer.indexOf("<span/>",spanPos);\r
356                 }\r
357                 \r
358                 logger.log(logger.EXTREME, "Leaving EnmlConverter.fixEnXMLCrap");\r
359                 return buffer.toString();\r
360         }\r
361         \r
362         // Fix stupid en-media problems\r
363         public String fixEnMediaCrap(String note) {\r
364                 if (note == null)\r
365                         return null;\r
366                 \r
367                 StringBuffer buffer = new StringBuffer(note);\r
368                 // get rid of any </en-media> tags since they shouldn't exist.\r
369                 int pos = buffer.indexOf("</en-media>");\r
370                 for (; pos>-1; ) {\r
371                         buffer.replace(pos, pos+11, "");\r
372                         pos = buffer.indexOf("</en-media>",pos);\r
373                 }\r
374                 \r
375                 \r
376                 // Make sure we have a proper /> ending the en-media tag\r
377                 pos = buffer.indexOf("<en-media");\r
378                 for (; pos>-1; ) {\r
379                         pos=buffer.indexOf(">", pos);\r
380                         if (!buffer.substring(pos-1,pos).equals("/"))\r
381                         buffer.replace(pos, pos+1, " />");\r
382                         pos = buffer.indexOf("<en-media",pos);\r
383                 }\r
384                 \r
385                 return buffer.toString();\r
386         }\r
387 }\r