X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=nkf_test.pl;h=91a9af882d710f6804bebf57ef3f73399bb8aafd;hb=9768248e615c4a0f86fa629ce811a0872bfd35d7;hp=4e79f6f68e3d575bc33ea72785bb579c0dd7ff49;hpb=4f2ea7528baf174776adf1363b03bdf7cfa5b5ba;p=nkf%2Fnkf.git diff --git a/nkf_test.pl b/nkf_test.pl index 4e79f6f..91a9af8 100644 --- a/nkf_test.pl +++ b/nkf_test.pl @@ -2,8 +2,6 @@ # # nkf test program for nkf-2 # -# $Id: nkf_test.pl,v 1.29 2008/02/07 19:25:29 naruse Exp $ -# # Shinji KONO # Sun Aug 18 12:25:40 JST 1996 # Sun Nov 8 00:16:06 JST 1998 @@ -186,7 +184,7 @@ print "UTF8 to U16L0..";&test("$nkf --ic=utf-8 --oc=utf-16le", $example{'utf8N' print "UTF8 to U16B...";&test("$nkf --ic=utf-8 --oc=utf-16be-bom", $example{'utf8N'},$example{'u16B'}); print "UTF8 to U16B0..";&test("$nkf --ic=utf-8 --oc=utf-16be", $example{'utf8N'},$example{'u16B0'}); - +print "UTF8 to UTF8...";&test("$nkf -w","\xf0\xa0\x80\x8b","\xf0\xa0\x80\x8b"); # From JIS @@ -216,8 +214,37 @@ print "UTF8 to SJIS...";&test("$nkf -s",$example{'utf1'},$example{'sjis1'}); print "UTF8 to EUC ...";&test("$nkf -e",$example{'utf1'},$example{'euc1'}); print "UTF8 to UTF8...";&test("$nkf -w",$example{'utf1'},$example{'utf1'}); -print "\nOther Features\n\n"; +# UTF +sub h {pack("H*",shift)} +print "SJIS to -w... ";&test("$nkf -w",h("82A0"),h("E38182")); +print "SJIS to -w8... ";&test("$nkf -w8",h("82A0"),h("EFBBBFE38182")); +print "SJIS to -w80... ";&test("$nkf -w80",h("82A0"),h("E38182")); +print "SJIS to UTF-8... ";&test("$nkf --oc=UTF-8",h("82A0"),h("E38182")); +print "SJIS to UTF-8N... ";&test("$nkf --oc=UTF-8N",h("82A0"),h("E38182")); +print "SJIS to UTF-8-BOM... ";&test("$nkf --oc=UTF-8-BOM",h("82A0"),h("EFBBBFE38182")); +print "SJIS to -w16... ";&test("$nkf -w16",h("82A0"),h("FEFF3042")); +print "SJIS to UTF-16... ";&test("$nkf --oc=UTF-16",h("82A0"),h("FEFF3042")); +print "SJIS to -w16B... ";&test("$nkf -w16B",h("82A0"),h("FEFF3042")); +print "SJIS to -w16B0... ";&test("$nkf -w16B0",h("82A0"),h("3042")); +print "SJIS to UTF-16BE... ";&test("$nkf --oc=UTF-16BE",h("82A0"),h("3042")); +print "SJIS to UTF-16BE-BOM...";&test("$nkf --oc=UTF-16BE-BOM",h("82A0"),h("FEFF3042")); +print "SJIS to -w16L... ";&test("$nkf -w16L",h("82A0"),h("FFFE4230")); +print "SJIS to -w16L0... ";&test("$nkf -w16L0",h("82A0"),h("4230")); +print "SJIS to UTF-16LE... ";&test("$nkf --oc=UTF-16LE",h("82A0"),h("4230")); +print "SJIS to UTF-16LE-BOM...";&test("$nkf --oc=UTF-16LE-BOM",h("82A0"),h("FFFE4230")); +print "SJIS to -w32... ";&test("$nkf -w32",h("82A0"),h("0000FEFF00003042")); +print "SJIS to UTF-32... ";&test("$nkf --oc=UTF-32",h("82A0"),h("0000FEFF00003042")); +print "SJIS to -w32B... ";&test("$nkf -w32B",h("82A0"),h("0000FEFF00003042")); +print "SJIS to -w32B0... ";&test("$nkf -w32B0",h("82A0"),h("00003042")); +print "SJIS to UTF-32BE... ";&test("$nkf --oc=UTF-32BE",h("82A0"),h("00003042")); +print "SJIS to UTF-32BE-BOM...";&test("$nkf --oc=UTF-32BE-BOM",h("82A0"),h("0000FEFF00003042")); +print "SJIS to -w32L... ";&test("$nkf -w32L",h("82A0"),h("FFFE000042300000")); +print "SJIS to -w32L0... ";&test("$nkf -w32L0",h("82A0"),h("42300000")); +print "SJIS to UTF-32LE... ";&test("$nkf --oc=UTF-32LE",h("82A0"),h("42300000")); +print "SJIS to UTF-32LE-BOM...";&test("$nkf --oc=UTF-32LE-BOM",h("82A0"),h("FFFE000042300000")); + +print "\nOther Features\n\n"; # Ambigous Case $example{'amb'} = unpack('u',<<'eofeof'); @@ -260,6 +287,13 @@ printf "%-40s", "Ambiguous Case."; printf "%-40s", "SJIS Input assumption"; &test("$nkf -jSx",$example{'amb'},$example{'amb.sjis'}); +# UTF8_STR_OF_JIS_SECOND_LEVEL_KANJI +$example{'utf8_str_of_jis_second_level_kanji'} = "\xe9\xa4\x83\xe5\xad\x90"; + +printf "%-40s", "UTF8_STR_OF_JIS_SECOND_LEVEL_KANJI"; + &test("$nkf -w",$example{'utf8_str_of_jis_second_level_kanji'}, + $example{'utf8_str_of_jis_second_level_kanji'}); + # Broken JIS printf "%-40s", "Broken JIS"; @@ -392,10 +426,10 @@ printf "%-40s", "Microsoft UCS Mapping :"; printf "%-40s", "CP932 to UTF-16BE :"; &test("$nkf --ic=cp932 --oc=utf-16be",$example{'ms_ucs_map_1_sjis'},$example{'ms_ucs_map_1_utf16_ms'}); -# X0201 $B2>L>(B +# X0201 仮名 # X0201->X0208 conversion # X0208 aphabet -> ASCII -# X0201 $BAj8_JQ49(B +# X0201 相互変換 print "\nX0201 test\n\n"; @@ -466,6 +500,24 @@ printf "%-40s", "X0201 conversion: EUC"; &test("$nkf -jZ",$example{'x0201.euc'},$example{'x0201.x0208'}); printf "%-40s", "X0201 conversion: UTF8"; &test("$nkf -jZ",$example{'x0201.utf'},$example{'x0201.x0208'}); +printf "%-40s", "-wZ"; &test("$nkf -wZ", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ0"; &test("$nkf -wZ0", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ1"; &test("$nkf -wZ1", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + " aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ2"; &test("$nkf -wZ2", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + " aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ3"; &test("$nkf -wZ3", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ4"; &test("$nkf -wZ4", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xEF\xBD\xB1"); # -x means X0201 output printf "%-40s", "X0201 output: SJIS"; &test("$nkf -xs",$example{'x0201.euc'},$example{'x0201.sjis'}); @@ -950,6 +1002,54 @@ eofeof printf "%-40s", "test_data/bugs10904"; &test("$nkf -Mj",$example{'test_data/bugs10904'},$example{'test_data/bugs10904.ans'}); +printf "%-40s", "test_data/ruby-dev:39722"; + &test("$nkf -Mj",<5)#6#)98DM%23T_/0H` +eofeof + +printf "%-40s", "test_data/bug19779 "; + &test("$nkf -jM",$example{'test_data/bug19779'},$example{'test_data/bug19779.ans'}); + +printf "%-40s", "[nkf-forum:47327] "; + &test("$nkf -wM",pack('H*','feffd852de76d814dc45000a'),"=?UTF-8?B?8KSptvCVgYU=?=\n"); + +printf "%-40s", "[nkf-forum:47334] "; + &test("$nkf -w",pack('H*','feff006100620063000a'),"abc\n"); + +printf "%-40s", "[nkf-bug:20079] "; + &test("$nkf -jSxM","\xBB \xBB","=?ISO-2022-JP?B?GyhJOxsoQiAbKEk7GyhC?="); + +printf "%-40s", "[nkf-forum:48850] "; + &test("$nkf -jSM", + "From: \x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0" . + " \n", + "From: =?ISO-2022-JP?B?GyRCJCIkIiQiJCIkIiQiJCIkIiQiGyhC?=" . + " \n"); + +printf "%-40s", "[nkf-bug:21393]-x "; + &test("$nkf --ic=UTF-8 --oc=CP932", + "\xEF\xBD\xBC\xEF\xBE\x9E\xEF\xBD\xAC\xEF\xBD\xB0\xEF\xBE\x8F\xEF\xBE\x9D\xEF\xBD\xA5\xEF\xBE\x8E\xEF\xBE\x9F\xEF\xBE\x83\xEF\xBE\x84\xEF\xBD\xA1", + "\xBC\xDE\xAC\xB0\xCF\xDD\xA5\xCE\xDF\xC3\xC4\xA1"); + +printf "%-40s", "[nkf-bug:21393]-X "; + &test("$nkf --ic=UTF-8 --oc=CP932 -X", + "\xEF\xBD\xBC\xEF\xBE\x9E\xEF\xBD\xAC\xEF\xBD\xB0\xEF\xBE\x8F\xEF\xBE\x9D\xEF\xBD\xA5\xEF\xBE\x8E\xEF\xBE\x9F\xEF\xBE\x83\xEF\xBE\x84\xEF\xBD\xA1", + "\x83W\x83\x83\x81[\x83}\x83\x93\x81E\x83|\x83e\x83g\x81B"); + + if (!NKF) { printf "%-40s", "Guess NL"; &command_tests( "$nkf --guess","none", "ASCII\n", @@ -973,6 +1073,7 @@ printf "%-40s", "Guess NL"; "$nkf --guess","\r\n.\n", "ASCII (MIXED NL)\n", "$nkf --guess","\r\n.\r", "ASCII (MIXED NL)\n", "$nkf --guess","\r\n.\r\n", "ASCII (CRLF)\n"); + } printf "%-40s", "Convert NL to LF"; &command_tests(