X-Git-Url: http://git.sourceforge.jp/view?p=nkf%2Fnkf.git;a=blobdiff_plain;f=nkf_test.pl;h=5a7a5cb3e1016d1bfd2e811feabc804159f1d6d9;hp=b60a55b71e7a15f40ddffeae1c8480ed77099b45;hb=88b848edfbddd42ebf2be54d3615c35402606726;hpb=57abf4f9f66e7bb3f8b00310d05a98f97d2af7a3 diff --git a/nkf_test.pl b/nkf_test.pl index b60a55b..5a7a5cb 100644 --- a/nkf_test.pl +++ b/nkf_test.pl @@ -2,7 +2,7 @@ # # nkf test program for nkf-2 # -# $Id: nkf_test.pl,v 1.25 2007/11/06 12:10:48 naruse Exp $ +# $Id: nkf_test.pl,v 1.31 2008/11/18 21:43:19 naruse Exp $ # # Shinji KONO # Sun Aug 18 12:25:40 JST 1996 @@ -186,7 +186,7 @@ print "UTF8 to U16L0..";&test("$nkf --ic=utf-8 --oc=utf-16le", $example{'utf8N' print "UTF8 to U16B...";&test("$nkf --ic=utf-8 --oc=utf-16be-bom", $example{'utf8N'},$example{'u16B'}); print "UTF8 to U16B0..";&test("$nkf --ic=utf-8 --oc=utf-16be", $example{'utf8N'},$example{'u16B0'}); - +print "UTF8 to UTF8...";&test("$nkf -w","\xf0\xa0\x80\x8b","\xf0\xa0\x80\x8b"); # From JIS @@ -216,8 +216,37 @@ print "UTF8 to SJIS...";&test("$nkf -s",$example{'utf1'},$example{'sjis1'}); print "UTF8 to EUC ...";&test("$nkf -e",$example{'utf1'},$example{'euc1'}); print "UTF8 to UTF8...";&test("$nkf -w",$example{'utf1'},$example{'utf1'}); -print "\nOther Features\n\n"; +# UTF +sub h {pack("H*",shift)} +print "SJIS to -w... ";&test("$nkf -w",h("82A0"),h("E38182")); +print "SJIS to -w8... ";&test("$nkf -w8",h("82A0"),h("EFBBBFE38182")); +print "SJIS to -w80... ";&test("$nkf -w80",h("82A0"),h("E38182")); +print "SJIS to UTF-8... ";&test("$nkf --oc=UTF-8",h("82A0"),h("E38182")); +print "SJIS to UTF-8N... ";&test("$nkf --oc=UTF-8N",h("82A0"),h("E38182")); +print "SJIS to UTF-8-BOM... ";&test("$nkf --oc=UTF-8-BOM",h("82A0"),h("EFBBBFE38182")); +print "SJIS to -w16... ";&test("$nkf -w16",h("82A0"),h("FEFF3042")); +print "SJIS to UTF-16... ";&test("$nkf --oc=UTF-16",h("82A0"),h("FEFF3042")); +print "SJIS to -w16B... ";&test("$nkf -w16B",h("82A0"),h("FEFF3042")); +print "SJIS to -w16B0... ";&test("$nkf -w16B0",h("82A0"),h("3042")); +print "SJIS to UTF-16BE... ";&test("$nkf --oc=UTF-16BE",h("82A0"),h("3042")); +print "SJIS to UTF-16BE-BOM...";&test("$nkf --oc=UTF-16BE-BOM",h("82A0"),h("FEFF3042")); +print "SJIS to -w16L... ";&test("$nkf -w16L",h("82A0"),h("FFFE4230")); +print "SJIS to -w16L0... ";&test("$nkf -w16L0",h("82A0"),h("4230")); +print "SJIS to UTF-16LE... ";&test("$nkf --oc=UTF-16LE",h("82A0"),h("4230")); +print "SJIS to UTF-16LE-BOM...";&test("$nkf --oc=UTF-16LE-BOM",h("82A0"),h("FFFE4230")); +print "SJIS to -w32... ";&test("$nkf -w32",h("82A0"),h("0000FEFF00003042")); +print "SJIS to UTF-32... ";&test("$nkf --oc=UTF-32",h("82A0"),h("0000FEFF00003042")); +print "SJIS to -w32B... ";&test("$nkf -w32B",h("82A0"),h("0000FEFF00003042")); +print "SJIS to -w32B0... ";&test("$nkf -w32B0",h("82A0"),h("00003042")); +print "SJIS to UTF-32BE... ";&test("$nkf --oc=UTF-32BE",h("82A0"),h("00003042")); +print "SJIS to UTF-32BE-BOM...";&test("$nkf --oc=UTF-32BE-BOM",h("82A0"),h("0000FEFF00003042")); +print "SJIS to -w32L... ";&test("$nkf -w32L",h("82A0"),h("FFFE000042300000")); +print "SJIS to -w32L0... ";&test("$nkf -w32L0",h("82A0"),h("42300000")); +print "SJIS to UTF-32LE... ";&test("$nkf --oc=UTF-32LE",h("82A0"),h("42300000")); +print "SJIS to UTF-32LE-BOM...";&test("$nkf --oc=UTF-32LE-BOM",h("82A0"),h("FFFE000042300000")); + +print "\nOther Features\n\n"; # Ambigous Case $example{'amb'} = unpack('u',<<'eofeof'); @@ -260,6 +289,13 @@ printf "%-40s", "Ambiguous Case."; printf "%-40s", "SJIS Input assumption"; &test("$nkf -jSx",$example{'amb'},$example{'amb.sjis'}); +# UTF8_STR_OF_JIS_SECOND_LEVEL_KANJI +$example{'utf8_str_of_jis_second_level_kanji'} = "\xe9\xa4\x83\xe5\xad\x90"; + +printf "%-40s", "UTF8_STR_OF_JIS_SECOND_LEVEL_KANJI"; + &test("$nkf -w",$example{'utf8_str_of_jis_second_level_kanji'}, + $example{'utf8_str_of_jis_second_level_kanji'}); + # Broken JIS printf "%-40s", "Broken JIS"; @@ -287,7 +323,7 @@ printf "%-40s", "test_data/cp51932"; # test_data/cp932inv printf "%-40s", "test_data/cp932inv"; - &test("$nkf -sE --cp932inv",$example{'test_data/cp932.ans'},$example{'test_data/cp932'}); + &test("$nkf -sE --cp932",$example{'test_data/cp932.ans'},$example{'test_data/cp932'}); # test_data/no-cp932inv @@ -297,22 +333,6 @@ eofeof printf "%-40s", "test_data/no-cp932inv"; &test("$nkf -sE --no-cp932",$example{'test_data/cp932.ans'},$example{'test_data/no-cp932inv.ans'}); -# ISO-2022-JP-MS -$example{'iso2022jpms_sjis'} = "\xA1\xDF\xF0\xA0\xF4\xFC\xF5\x40\xF9\xFC"; -$example{'iso2022jpms_euc'} = "\x8e\xa1\x8e\xdf\xf6\xa2\xfe\xfe\x8f\xf5\xa1\x8f\xfe\xfe"; -$example{'iso2022jpms_jis'} = "\x1b\x28\x49\x21\x5f\x1b\x24\x28\x3f\x22\x22\x2a\x7e\x2b\x21\x34\x7e\x1b\x28\x42"; - -# printf "%-40s", "ISO-2022-JP-MS to CP932"; -# &test("$nkf --ic=ISO-2022-JP-MS --oc=CP932",$example{'iso2022jpms_jis'},$example{'iso2022jpms_sjis'}); -# -# printf "%-40s", "CP932 to ISO-2022-JP-MS"; -# &test("$nkf --ic=CP932 --oc=ISO-2022-JP-MS",$example{'iso2022jpms_sjis'},$example{'iso2022jpms_jis'}); -# -# printf "%-40s", "ISO-2022-JP-MS to eucJP-ms"; -# &test("$nkf --ic=ISO-2022-JP-MS --oc=eucJP-ms",$example{'iso2022jpms_jis'},$example{'iso2022jpms_euc'}); -# -# printf "%-40s", "eucJP-ms to ISO-2022-JP-MS"; -# &test("$nkf --ic=eucJP-ms --oc=ISO-2022-JP-MS",$example{'iso2022jpms_euc'},$example{'iso2022jpms_jis'}); # JIS X 0212 $example{'jisx0212_euc'} = "\x8F\xA2\xAF\x8F\xED\xE3"; $example{'jisx0212_jis'} = "\x1b\x24\x28\x44\x22\x2f\x6d\x63\x1b\x28\x42"; @@ -332,9 +352,12 @@ $example{'jisx0213_euc'} = unpack('u',<<'eofeof'); 2HJ^O_<_5S_WTJ/[YCZ&AC_[V eofeof -$example{'jisx0213_jis'} = unpack('u',<<'eofeof'); +$example{'jisx0213_jis2000'} = unpack('u',<<'eofeof'); ;&R0H3R(O+WU/54]]="A^>1LD*%`A(7YV&RA" eofeof +$example{'jisx0213_jis2004'} = unpack('u',<<'eofeof'); +;&R0H42(O+WU/54]]="A^>1LD*%`A(7YV&RA" +eofeof printf "%-40s", "Shift_JISX0213 to EUC-JISX0213"; &test("$nkf --ic=Shift_JISX0213 --oc=EUC-JISX0213",$example{'jisx0213_sjis'},$example{'jisx0213_euc'}); @@ -343,10 +366,13 @@ printf "%-40s", "EUC-JISX0213 to Shift_JISX0213"; &test("$nkf --ic=EUC-JISX0213 --oc=Shift_JISX0213",$example{'jisx0213_euc'},$example{'jisx0213_sjis'}); printf "%-40s", "ISO-2022-JP-3 to EUC-JISX0213"; - &test("$nkf --ic=ISO-2022-JP-3 --oc=EUC-JISX0213",$example{'jisx0213_jis'},$example{'jisx0213_euc'}); + &test("$nkf --ic=ISO-2022-JP-3 --oc=EUC-JISX0213",$example{'jisx0213_jis2000'},$example{'jisx0213_euc'}); + +printf "%-40s", "ISO-2022-JP-2004 to EUC-JISX0213"; + &test("$nkf --ic=ISO-2022-JP-2004 --oc=EUC-JISX0213",$example{'jisx0213_jis2004'},$example{'jisx0213_euc'}); -printf "%-40s", "EUC-JISX0213 to ISO-2022-JP-3"; - &test("$nkf --ic=EUC-JISX0213 --oc=ISO-2022-JP-3",$example{'jisx0213_euc'},$example{'jisx0213_jis'}); +printf "%-40s", "EUC-JISX0213 to ISO-2022-JP-2004"; + &test("$nkf --ic=EUC-JISX0213 --oc=ISO-2022-JP-2004",$example{'jisx0213_euc'},$example{'jisx0213_jis2004'}); # test_data/no_best_fit_chars $example{'test_data/no_best_fit_chars'} = unpack('u',<<'eofeof'); @@ -395,17 +421,17 @@ $example{'ms_ucs_map_1_utf16'} = "\x30\x1C\x20\x16\x22\x12\x00\xA2\x00\xA3\x00\x $example{'ms_ucs_map_1_utf16_ms'} = "\xFF\x5E\x22\x25\xFF\x0D\xFF\xE0\xFF\xE1\xFF\xE2"; printf "%-40s", "Normal UCS Mapping :"; - &test("$nkf -w16B0 -S",$example{'ms_ucs_map_1_sjis'},$example{'ms_ucs_map_1_utf16'}); + &test("$nkf -w16B0 --ic=Shift_JIS",$example{'ms_ucs_map_1_sjis'},$example{'ms_ucs_map_1_utf16'}); printf "%-40s", "Microsoft UCS Mapping :"; &test("$nkf -w16B0 -S --ms-ucs-map",$example{'ms_ucs_map_1_sjis'},$example{'ms_ucs_map_1_utf16_ms'}); printf "%-40s", "CP932 to UTF-16BE :"; &test("$nkf --ic=cp932 --oc=utf-16be",$example{'ms_ucs_map_1_sjis'},$example{'ms_ucs_map_1_utf16_ms'}); -# X0201 $B2>L>(B +# X0201 仮名 # X0201->X0208 conversion # X0208 aphabet -> ASCII -# X0201 $BAj8_JQ49(B +# X0201 相互変換 print "\nX0201 test\n\n"; @@ -476,6 +502,24 @@ printf "%-40s", "X0201 conversion: EUC"; &test("$nkf -jZ",$example{'x0201.euc'},$example{'x0201.x0208'}); printf "%-40s", "X0201 conversion: UTF8"; &test("$nkf -jZ",$example{'x0201.utf'},$example{'x0201.x0208'}); +printf "%-40s", "-wZ"; &test("$nkf -wZ", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ0"; &test("$nkf -wZ0", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ1"; &test("$nkf -wZ1", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + " aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ2"; &test("$nkf -wZ2", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + " aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ3"; &test("$nkf -wZ3", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ4"; &test("$nkf -wZ4", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xEF\xBD\xB1"); # -x means X0201 output printf "%-40s", "X0201 output: SJIS"; &test("$nkf -xs",$example{'x0201.euc'},$example{'x0201.sjis'}); @@ -599,7 +643,7 @@ eofeof # Without -l, ISO-8859-1 was handled as X0201. printf "%-40s", "MIME ISO-8859-1 (Q)"; - &test("$nkf -ml",$example{'mime.is8859'},$example{'mime.is8859.ans'}); + &test("$nkf -jml",$example{'mime.is8859'},$example{'mime.is8859.ans'}); # test for -f is not so simple. @@ -875,7 +919,7 @@ $example{'test_data/slash.ans'} = unpack('u',<<'eofeof'); eofeof printf "%-40s", "test_data/slash"; - &test("$nkf ",$example{'test_data/slash'},$example{'test_data/slash.ans'}); + &test("$nkf -j",$example{'test_data/slash'},$example{'test_data/slash.ans'}); # test_data/z1space-0 $example{'test_data/z1space-0'} = unpack('u',<<'eofeof'); @@ -960,99 +1004,109 @@ eofeof printf "%-40s", "test_data/bugs10904"; &test("$nkf -Mj",$example{'test_data/bugs10904'},$example{'test_data/bugs10904.ans'}); +printf "%-40s", "test_data/ruby-dev:39722"; + &test("$nkf -Mj",<