OSDN Git Service

Correct JTidy to input text in UTF-8 to prevent some characters from being garbled.
authorRandy Baumgarte <randy@fbn.cx>
Fri, 1 Oct 2010 11:38:52 +0000 (07:38 -0400)
committerRandy Baumgarte <randy@fbn.cx>
Fri, 1 Oct 2010 16:44:37 +0000 (12:44 -0400)
src/cx/fbn/nevernote/evernote/EnmlConverter.java

index 3f0ae8a..ab224de 100644 (file)
@@ -27,6 +27,9 @@ import java.util.List;
 import org.w3c.tidy.Tidy;\r
 import org.w3c.tidy.TidyMessage;\r
 \r
+import com.trolltech.qt.core.QByteArray;\r
+import com.trolltech.qt.core.QTextCodec;\r
+\r
 import cx.fbn.nevernote.Global;\r
 import cx.fbn.nevernote.utilities.ApplicationLogger;\r
 import cx.fbn.nevernote.xml.XMLCleanup;\r
@@ -127,12 +130,21 @@ public class EnmlConverter {
                tidy.setMessageListener(tidyListener);\r
                tidy.getStderr().close();  // the listener will capture messages\r
                tidy.setXmlTags(true);\r
-               byte html[] = newContent.getBytes();\r
-               ByteArrayInputStream is = new ByteArrayInputStream(html);\r
-               ByteArrayOutputStream os = new ByteArrayOutputStream();\r
+               \r
+               QTextCodec codec;\r
+               codec = QTextCodec.codecForName("UTF-8");\r
+        QByteArray unicode =  codec.fromUnicode(newContent);\r
+        \r
+//             byte html[] = newContent.getBytes();\r
+//             ByteArrayInputStream is = new ByteArrayInputStream(html);\r
+\r
+               ByteArrayInputStream is = new ByteArrayInputStream(unicode.toByteArray());\r
+        ByteArrayOutputStream os = new ByteArrayOutputStream();\r
+        tidy.setInputEncoding("UTF-8");\r
+//        tidy.setOutputEncoding("UTF-8");\r
                tidy.parse(is, os);\r
                newContent = os.toString();\r
-               \r
+//             newContent = new QByteArray(codec.fromUnicode(os.toString())).toString();\r
                if (tidyListener.errorFound) {\r
                        logger.log(logger.LOW, "Note Contents Begin");\r
                        logger.log(logger.LOW, content);\r