package jp.sourceforge.jindolf.parser;
+import io.bitbucket.olyutorskii.jiocema.DecodeBreakException;
+import io.bitbucket.olyutorskii.jiocema.DecodeNotifier;
import java.io.ByteArrayInputStream;
+import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
-import java.util.ArrayList;
import java.util.List;
import org.junit.After;
import org.junit.AfterClass;
/**
*
*/
-public class ContentBuilderUCS2Test {
+public class ContentBuilderTest {
- public ContentBuilderUCS2Test() {
+ public ContentBuilderTest() {
}
@BeforeClass
public void tearDown() {
}
- public static byte[] byteArray(CharSequence seq){
- byte[] result;
-
- List<Byte> byteList = new ArrayList<>();
-
- int length = seq.length();
- for(int pos = 0; pos < length; pos++){
- int val = 0;
-
- char ch = seq.charAt(pos);
-
- if('0' <= ch && ch <= '9'){
- val += ch - '0';
- }else if('a' <= ch && ch <= 'f'){
- val += ch - 'a' + 10;
- }else if('A' <= ch && ch <= 'F'){
- val += ch - 'A' + 10;
- }else{
- continue;
- }
-
- pos++;
- if(pos >= length) break;
-
- val *= 16;
- ch = seq.charAt(pos);
-
- if('0' <= ch && ch <= '9'){
- val += ch - '0';
- }else if('a' <= ch && ch <= 'f'){
- val += ch - 'a' + 10;
- }else if('A' <= ch && ch <= 'F'){
- val += ch - 'A' + 10;
- }else{
- continue;
- }
-
- byteList.add((byte)val);
- }
-
- result = new byte[byteList.size()];
-
- for(int pos = 0; pos < result.length; pos++){
- result[pos] = byteList.get(pos);
- }
-
- return result;
- }
/**
* Test of UTF8
+ * @throws Exception
*/
@Test
public void testUTF8() throws Exception {
Charset cs = Charset.forName("UTF-8");
CharsetDecoder cd;
- ContentBuilderUCS2 cb;
- StreamDecoder decoder;
+ ContentBuilder cb;
+ DecodeNotifier decoder;
byte[] bdata;
InputStream is;
DecodedContent content;
cd = cs.newDecoder();
- decoder = new StreamDecoder(cd);
- cb = new ContentBuilderUCS2();
- decoder.setDecodeHandler(cb);
- bdata = byteArray("41:42:43");
+ decoder = new DecodeNotifier(cd);
+ cb = new ContentBuilder();
+ decoder.setCharDecodeListener(cb);
+ bdata = Bseq.byteArray("41:42:43");
is = new ByteArrayInputStream(bdata);
decoder.decode(is);
content = cb.getContent();
cd = cs.newDecoder();
- decoder = new StreamDecoder(cd);
- cb = new ContentBuilderUCS2();
- decoder.setDecodeHandler(cb);
- bdata = byteArray("41:EFBCA2:43");
+ decoder = new DecodeNotifier(cd);
+ cb = new ContentBuilder();
+ decoder.setCharDecodeListener(cb);
+ bdata = Bseq.byteArray("41:EFBCA2:43");
is = new ByteArrayInputStream(bdata);
decoder.decode(is);
content = cb.getContent();
cd = cs.newDecoder();
- decoder = new StreamDecoder(cd);
- cb = new ContentBuilderUCS2();
- decoder.setDecodeHandler(cb);
- bdata = byteArray("41:FF:43");
+ decoder = new DecodeNotifier(cd);
+ cb = new ContentBuilder();
+ decoder.setCharDecodeListener(cb);
+ bdata = Bseq.byteArray("41:FF:43");
is = new ByteArrayInputStream(bdata);
decoder.decode(is);
content = cb.getContent();
/**
* Test of UTF16
+ * @throws Exception
*/
@Test
public void testUTF16() throws Exception {
Charset cs = Charset.forName("UTF-16");
CharsetDecoder cd;
- ContentBuilderUCS2 cb;
- StreamDecoder decoder;
+ ContentBuilder cb;
+ DecodeNotifier decoder;
byte[] bdata;
InputStream is;
DecodedContent content;
cd = cs.newDecoder();
- decoder = new StreamDecoder(cd);
- cb = new ContentBuilderUCS2();
- decoder.setDecodeHandler(cb);
- bdata = byteArray("0041:0042:0043");
+ decoder = new DecodeNotifier(cd);
+ cb = new ContentBuilder();
+ decoder.setCharDecodeListener(cb);
+ bdata = Bseq.byteArray("0041:0042:0043");
is = new ByteArrayInputStream(bdata);
decoder.decode(is);
content = cb.getContent();
cd = cs.newDecoder();
- decoder = new StreamDecoder(cd);
- cb = new ContentBuilderUCS2();
- decoder.setDecodeHandler(cb);
- bdata = byteArray("0041:FF22:0043");
+ decoder = new DecodeNotifier(cd);
+ cb = new ContentBuilder();
+ decoder.setCharDecodeListener(cb);
+ bdata = Bseq.byteArray("0041:FF22:0043");
is = new ByteArrayInputStream(bdata);
decoder.decode(is);
content = cb.getContent();
/**
* Test of UTF16 sequence error
+ * @throws Exception
*/
@Test
public void testUTF16_seq() throws Exception {
Charset cs = Charset.forName("UTF-16");
CharsetDecoder cd;
- ContentBuilderUCS2 cb;
- StreamDecoder decoder;
+ ContentBuilder cb;
+ DecodeNotifier decoder;
byte[] bdata;
InputStream is;
DecodedContent content;
DecodeErrorInfo einfo;
cd = cs.newDecoder();
- decoder = new StreamDecoder(cd);
- cb = new ContentBuilderUCS2();
- decoder.setDecodeHandler(cb);
- bdata = byteArray("0041:d800:0043:0044");
+ decoder = new DecodeNotifier(cd);
+ cb = new ContentBuilder();
+ decoder.setCharDecodeListener(cb);
+ bdata = Bseq.byteArray("0041:d800:0043:0044");
is = new ByteArrayInputStream(bdata);
decoder.decode(is);
content = cb.getContent();
cd = cs.newDecoder();
- decoder = new StreamDecoder(cd);
- cb = new ContentBuilderUCS2();
- decoder.setDecodeHandler(cb);
- bdata = byteArray("0041:0042:dc00:0044");
+ decoder = new DecodeNotifier(cd);
+ cb = new ContentBuilder();
+ decoder.setCharDecodeListener(cb);
+ bdata = Bseq.byteArray("0041:0042:dc00:0044");
is = new ByteArrayInputStream(bdata);
decoder.decode(is);
content = cb.getContent();
cd = cs.newDecoder();
- decoder = new StreamDecoder(cd);
- cb = new ContentBuilderUCS2();
- decoder.setDecodeHandler(cb);
- bdata = byteArray("0041:d800");
+ decoder = new DecodeNotifier(cd);
+ cb = new ContentBuilder();
+ decoder.setCharDecodeListener(cb);
+ bdata = Bseq.byteArray("0041:d800");
is = new ByteArrayInputStream(bdata);
decoder.decode(is);
content = cb.getContent();
/**
* Test of UTF16 mapping error
+ * @throws Exception
*/
@Test
public void testUTF16_nomap() throws Exception {
Charset cs = Charset.forName("UTF-16");
CharsetDecoder cd;
- ContentBuilderUCS2 cb;
- StreamDecoder decoder;
+ ContentBuilder cb;
+ DecodeNotifier decoder;
byte[] bdata;
InputStream is;
DecodedContent content;
cd = cs.newDecoder();
- decoder = new StreamDecoder(cd);
- cb = new ContentBuilderUCS2();
- decoder.setDecodeHandler(cb);
- bdata = byteArray("0041:d83d:dc11:0042");
+ decoder = new DecodeNotifier(cd);
+ cb = new ContentBuilder();
+ decoder.setCharDecodeListener(cb);
+ bdata = Bseq.byteArray("0041:d83d:dc11:0042");
is = new ByteArrayInputStream(bdata);
decoder.decode(is);
content = cb.getContent();
return;
}
+ @Test
+ public void testSheep() throws IOException, DecodeBreakException {
+ System.out.println("sheep");
+
+ Charset cs;
+ CharsetDecoder decoder;
+ ContentBuilder listener;
+
+ DecodeNotifier sd;
+ InputStream is;
+
+ cs = Charset.forName("UTF-8");
+ decoder = cs.newDecoder();
+
+ sd = new DecodeNotifier(decoder);
+
+ listener = new ContentBuilder();
+ sd.setCharDecodeListener(listener);
+
+ // SMP character U+1F411 [SHEEP]
+ // see https://ja.osdn.net/projects/jindolf/ticket/36356
+ is = Bseq.byteStream(0xf0, 0x9f, 0x90, 0x91);
+ sd.decode(is);
+ assertEquals("\ud83d\udc11", listener.getContent().toString());
+
+ return;
+ }
+
}