X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=nkf_test.pl;h=237fb329e7dacae2cb437c82beb2dfea57527c1c;hb=a8bce81d92c1158c4baeeb2ae8b9c86aff9a12f5;hp=dd7a286c6f71b059f8e7f49fbd4620fc8a5f11a0;hpb=1979aa560aa415d0ec4e6e1193f3f25432852d1e;p=nkf%2Fnkf.git diff --git a/nkf_test.pl b/nkf_test.pl index dd7a286..237fb32 100644 --- a/nkf_test.pl +++ b/nkf_test.pl @@ -2,7 +2,7 @@ # # nkf test program for nkf-2 # -# $Id: nkf_test.pl,v 1.8 2004/11/19 15:27:39 naruse Exp $ +# $Id: nkf_test.pl,v 1.31 2008/11/18 21:43:19 naruse Exp $ # # Shinji KONO # Sun Aug 18 12:25:40 JST 1996 @@ -120,74 +120,133 @@ eofeof # From JIS -print "JIS to JIS ... ";&test("$nkf -j",$example{'jis'},$example{'jis'}); -print "JIS to SJIS... ";&test("$nkf -s",$example{'jis'},$example{'sjis'}); -print "JIS to EUC ... ";&test("$nkf -e",$example{'jis'},$example{'euc'}); -print "JIS to UTF8... ";&test("$nkf -w",$example{'jis'},$example{'utf8N'}); -print "JIS to U16L... ";&test("$nkf -w16L",$example{'jis'},$example{'u16L'}); -print "JIS to U16B... ";&test("$nkf -w16B",$example{'jis'},$example{'u16B'}); +print "JIS to JIS ...";&test("$nkf -j",$example{'jis'},$example{'jis'}); +print "JIS to SJIS...";&test("$nkf -s",$example{'jis'},$example{'sjis'}); +print "JIS to EUC ...";&test("$nkf -e",$example{'jis'},$example{'euc'}); +print "JIS to UTF8...";&test("$nkf -w",$example{'jis'},$example{'utf8N'}); +print "JIS to U16L...";&test("$nkf -w16L",$example{'jis'},$example{'u16L'}); +print "JIS to U16B...";&test("$nkf -w16B",$example{'jis'},$example{'u16B'}); +print "JIS to JIS ...";&test("$nkf --ic=iso-2022-jp --oc=iso-2022-jp" ,$example{'jis'},$example{'jis'}); +print "JIS to SJIS...";&test("$nkf --ic=iso-2022-jp --oc=shift_jis" ,$example{'jis'},$example{'sjis'}); +print "JIS to EUC ...";&test("$nkf --ic=iso-2022-jp --oc=euc-jp" ,$example{'jis'},$example{'euc'}); +print "JIS to UTF8...";&test("$nkf --ic=iso-2022-jp --oc=utf-8n" ,$example{'jis'},$example{'utf8N'}); +print "JIS to U16L...";&test("$nkf --ic=iso-2022-jp --oc=utf-16le-bom",$example{'jis'},$example{'u16L'}); +print "JIS to U16B...";&test("$nkf --ic=iso-2022-jp --oc=utf-16be-bom",$example{'jis'},$example{'u16B'}); # From SJIS -print "SJIS to JIS ... ";&test("$nkf -j",$example{'sjis'},$example{'jis'}); -print "SJIS to SJIS... ";&test("$nkf -s",$example{'sjis'},$example{'sjis'}); -print "SJIS to EUC ... ";&test("$nkf -e",$example{'sjis'},$example{'euc'}); -print "SJIS to UTF8... ";&test("$nkf -w",$example{'sjis'},$example{'utf8N'}); -print "SJIS to U16L... ";&test("$nkf -w16L",$example{'sjis'},$example{'u16L'}); -print "SJIS to U16B... ";&test("$nkf -w16B",$example{'sjis'},$example{'u16B'}); +print "SJIS to JIS ...";&test("$nkf -j",$example{'sjis'},$example{'jis'}); +print "SJIS to SJIS...";&test("$nkf -s",$example{'sjis'},$example{'sjis'}); +print "SJIS to EUC ...";&test("$nkf -e",$example{'sjis'},$example{'euc'}); +print "SJIS to UTF8...";&test("$nkf -w",$example{'sjis'},$example{'utf8N'}); +print "SJIS to U16L...";&test("$nkf -w16L",$example{'sjis'},$example{'u16L'}); +print "SJIS to U16B...";&test("$nkf -w16B",$example{'sjis'},$example{'u16B'}); +print "SJIS to JIS ...";&test("$nkf --ic=shift_jis --oc=iso-2022-jp" ,$example{'sjis'},$example{'jis'}); +print "SJIS to SJIS...";&test("$nkf --ic=shift_jis --oc=shift_jis" ,$example{'sjis'},$example{'sjis'}); +print "SJIS to EUC ...";&test("$nkf --ic=shift_jis --oc=euc-jp" ,$example{'sjis'},$example{'euc'}); +print "SJIS to UTF8...";&test("$nkf --ic=shift_jis --oc=utf-8n" ,$example{'sjis'},$example{'utf8N'}); +print "SJIS to U16L...";&test("$nkf --ic=shift_jis --oc=utf-16le-bom" ,$example{'sjis'},$example{'u16L'}); +print "SJIS to U16B...";&test("$nkf --ic=shift_jis --oc=utf-16be-bom" ,$example{'sjis'},$example{'u16B'}); # From EUC -print "EUC to JIS ... ";&test("$nkf -j",$example{'euc'},$example{'jis'}); -print "EUC to SJIS... ";&test("$nkf -s",$example{'euc'},$example{'sjis'}); -print "EUC to EUC ... ";&test("$nkf -e",$example{'euc'},$example{'euc'}); -print "EUC to UTF8... ";&test("$nkf -w",$example{'euc'},$example{'utf8N'}); -print "EUC to U16L... ";&test("$nkf -w16L",$example{'euc'},$example{'u16L'}); -print "EUC to U16B... ";&test("$nkf -w16B",$example{'euc'},$example{'u16B'}); +print "EUC to JIS ...";&test("$nkf -j",$example{'euc'},$example{'jis'}); +print "EUC to SJIS...";&test("$nkf -s",$example{'euc'},$example{'sjis'}); +print "EUC to EUC ...";&test("$nkf -e",$example{'euc'},$example{'euc'}); +print "EUC to UTF8...";&test("$nkf -w",$example{'euc'},$example{'utf8N'}); +print "EUC to U16L...";&test("$nkf -w16L",$example{'euc'},$example{'u16L'}); +print "EUC to U16B...";&test("$nkf -w16B",$example{'euc'},$example{'u16B'}); +print "EUC to JIS ...";&test("$nkf --ic=euc-jp --oc=iso-2022-jp" ,$example{'euc'},$example{'jis'}); +print "EUC to SJIS...";&test("$nkf --ic=euc-jp --oc=shift_jis" ,$example{'euc'},$example{'sjis'}); +print "EUC to EUC ...";&test("$nkf --ic=euc-jp --oc=euc-jp" ,$example{'euc'},$example{'euc'}); +print "EUC to UTF8...";&test("$nkf --ic=euc-jp --oc=utf-8n" ,$example{'euc'},$example{'utf8N'}); +print "EUC to U16L...";&test("$nkf --ic=euc-jp --oc=utf-16le-bom" ,$example{'euc'},$example{'u16L'}); +print "EUC to U16B...";&test("$nkf --ic=euc-jp --oc=utf-16be-bom" ,$example{'euc'},$example{'u16B'}); # From UTF8 -print "UTF8 to JIS ... ";&test("$nkf -j", $example{'utf8N'},$example{'jis'}); -print "UTF8 to SJIS... ";&test("$nkf -s", $example{'utf8N'},$example{'sjis'}); -print "UTF8 to EUC ... ";&test("$nkf -e", $example{'utf8N'},$example{'euc'}); -print "UTF8 to UTF8N.. ";&test("$nkf -w", $example{'utf8N'},$example{'utf8N'}); -print "UTF8 to UTF8... ";&test("$nkf -w8", $example{'utf8N'},$example{'utf8'}); -print "UTF8 to UTF8N.. ";&test("$nkf -w80", $example{'utf8N'},$example{'utf8N'}); -print "UTF8 to U16L... ";&test("$nkf -w16L", $example{'utf8N'},$example{'u16L'}); -print "UTF8 to U16L0.. ";&test("$nkf -w16L0", $example{'utf8N'},$example{'u16L0'}); -print "UTF8 to U16B... ";&test("$nkf -w16B", $example{'utf8N'},$example{'u16B'}); -print "UTF8 to U16B0.. ";&test("$nkf -w16B0", $example{'utf8N'},$example{'u16B0'}); - - +print "UTF8 to JIS ...";&test("$nkf -j", $example{'utf8N'},$example{'jis'}); +print "UTF8 to SJIS...";&test("$nkf -s", $example{'utf8N'},$example{'sjis'}); +print "UTF8 to EUC ...";&test("$nkf -e", $example{'utf8N'},$example{'euc'}); +print "UTF8 to UTF8N..";&test("$nkf -w", $example{'utf8N'},$example{'utf8N'}); +print "UTF8 to UTF8...";&test("$nkf -w8", $example{'utf8N'},$example{'utf8'}); +print "UTF8 to UTF8N..";&test("$nkf -w80", $example{'utf8N'},$example{'utf8N'}); +print "UTF8 to U16L...";&test("$nkf -w16L", $example{'utf8N'},$example{'u16L'}); +print "UTF8 to U16L0..";&test("$nkf -w16L0", $example{'utf8N'},$example{'u16L0'}); +print "UTF8 to U16B...";&test("$nkf -w16B", $example{'utf8N'},$example{'u16B'}); +print "UTF8 to U16B0..";&test("$nkf -w16B0", $example{'utf8N'},$example{'u16B0'}); +print "UTF8 to JIS ...";&test("$nkf --ic=utf-8 --oc=iso-2022-jp", $example{'utf8N'},$example{'jis'}); +print "UTF8 to SJIS...";&test("$nkf --ic=utf-8 --oc=shift_jis", $example{'utf8N'},$example{'sjis'}); +print "UTF8 to EUC ...";&test("$nkf --ic=utf-8 --oc=euc-jp", $example{'utf8N'},$example{'euc'}); +print "UTF8 to UTF8N..";&test("$nkf --ic=utf-8 --oc=utf-8", $example{'utf8N'},$example{'utf8N'}); +print "UTF8 to UTF8BOM";&test("$nkf --ic=utf-8 --oc=utf-8-bom", $example{'utf8N'},$example{'utf8'}); +print "UTF8 to UTF8N..";&test("$nkf --ic=utf-8 --oc=utf-8n", $example{'utf8N'},$example{'utf8N'}); +print "UTF8 to U16L...";&test("$nkf --ic=utf-8 --oc=utf-16le-bom", $example{'utf8N'},$example{'u16L'}); +print "UTF8 to U16L0..";&test("$nkf --ic=utf-8 --oc=utf-16le", $example{'utf8N'},$example{'u16L0'}); +print "UTF8 to U16B...";&test("$nkf --ic=utf-8 --oc=utf-16be-bom", $example{'utf8N'},$example{'u16B'}); +print "UTF8 to U16B0..";&test("$nkf --ic=utf-8 --oc=utf-16be", $example{'utf8N'},$example{'u16B0'}); + +print "UTF8 to UTF8...";&test("$nkf -w","\xf0\xa0\x80\x8b","\xf0\xa0\x80\x8b"); # From JIS -print "JIS to JIS ... ";&test("$nkf -j",$example{'jis1'},$example{'jis1'}); -print "JIS to SJIS... ";&test("$nkf -s",$example{'jis1'},$example{'sjis1'}); -print "JIS to EUC ... ";&test("$nkf -e",$example{'jis1'},$example{'euc1'}); -print "JIS to UTF8... ";&test("$nkf -w",$example{'jis1'},$example{'utf1'}); +print "JIS to JIS ...";&test("$nkf -j",$example{'jis1'},$example{'jis1'}); +print "JIS to SJIS...";&test("$nkf -s",$example{'jis1'},$example{'sjis1'}); +print "JIS to EUC ...";&test("$nkf -e",$example{'jis1'},$example{'euc1'}); +print "JIS to UTF8...";&test("$nkf -w",$example{'jis1'},$example{'utf1'}); # From SJIS -print "SJIS to JIS ... ";&test("$nkf -j",$example{'sjis1'},$example{'jis1'}); -print "SJIS to SJIS... ";&test("$nkf -s",$example{'sjis1'},$example{'sjis1'}); -print "SJIS to EUC ... ";&test("$nkf -e",$example{'sjis1'},$example{'euc1'}); -print "SJIS to UTF8... ";&test("$nkf -w",$example{'sjis1'},$example{'utf1'}); +print "SJIS to JIS ...";&test("$nkf -j",$example{'sjis1'},$example{'jis1'}); +print "SJIS to SJIS...";&test("$nkf -s",$example{'sjis1'},$example{'sjis1'}); +print "SJIS to EUC ...";&test("$nkf -e",$example{'sjis1'},$example{'euc1'}); +print "SJIS to UTF8...";&test("$nkf -w",$example{'sjis1'},$example{'utf1'}); # From EUC -print "EUC to JIS ... ";&test("$nkf -j",$example{'euc1'},$example{'jis1'}); -print "EUC to SJIS... ";&test("$nkf -s",$example{'euc1'},$example{'sjis1'}); -print "EUC to EUC ... ";&test("$nkf -e",$example{'euc1'},$example{'euc1'}); -print "EUC to UTF8... ";&test("$nkf -w",$example{'euc1'},$example{'utf1'}); +print "EUC to JIS ...";&test("$nkf -j",$example{'euc1'},$example{'jis1'}); +print "EUC to SJIS...";&test("$nkf -s",$example{'euc1'},$example{'sjis1'}); +print "EUC to EUC ...";&test("$nkf -e",$example{'euc1'},$example{'euc1'}); +print "EUC to UTF8...";&test("$nkf -w",$example{'euc1'},$example{'utf1'}); # From UTF8 -print "UTF8 to JIS ... ";&test("$nkf -j",$example{'utf1'},$example{'jis1'}); -print "UTF8 to SJIS... ";&test("$nkf -s",$example{'utf1'},$example{'sjis1'}); -print "UTF8 to EUC ... ";&test("$nkf -e",$example{'utf1'},$example{'euc1'}); -print "UTF8 to UTF8... ";&test("$nkf -w",$example{'utf1'},$example{'utf1'}); - +print "UTF8 to JIS ...";&test("$nkf -j",$example{'utf1'},$example{'jis1'}); +print "UTF8 to SJIS...";&test("$nkf -s",$example{'utf1'},$example{'sjis1'}); +print "UTF8 to EUC ...";&test("$nkf -e",$example{'utf1'},$example{'euc1'}); +print "UTF8 to UTF8...";&test("$nkf -w",$example{'utf1'},$example{'utf1'}); + +# UTF +sub h {pack("H*",shift)} +print "SJIS to -w... ";&test("$nkf -w",h("82A0"),h("E38182")); +print "SJIS to -w8... ";&test("$nkf -w8",h("82A0"),h("EFBBBFE38182")); +print "SJIS to -w80... ";&test("$nkf -w80",h("82A0"),h("E38182")); +print "SJIS to UTF-8... ";&test("$nkf --oc=UTF-8",h("82A0"),h("E38182")); +print "SJIS to UTF-8N... ";&test("$nkf --oc=UTF-8N",h("82A0"),h("E38182")); +print "SJIS to UTF-8-BOM... ";&test("$nkf --oc=UTF-8-BOM",h("82A0"),h("EFBBBFE38182")); +print "SJIS to -w16... ";&test("$nkf -w16",h("82A0"),h("FEFF3042")); +print "SJIS to UTF-16... ";&test("$nkf --oc=UTF-16",h("82A0"),h("FEFF3042")); +print "SJIS to -w16B... ";&test("$nkf -w16B",h("82A0"),h("FEFF3042")); +print "SJIS to -w16B0... ";&test("$nkf -w16B0",h("82A0"),h("3042")); +print "SJIS to UTF-16BE... ";&test("$nkf --oc=UTF-16BE",h("82A0"),h("3042")); +print "SJIS to UTF-16BE-BOM...";&test("$nkf --oc=UTF-16BE-BOM",h("82A0"),h("FEFF3042")); +print "SJIS to -w16L... ";&test("$nkf -w16L",h("82A0"),h("FFFE4230")); +print "SJIS to -w16L0... ";&test("$nkf -w16L0",h("82A0"),h("4230")); +print "SJIS to UTF-16LE... ";&test("$nkf --oc=UTF-16LE",h("82A0"),h("4230")); +print "SJIS to UTF-16LE-BOM...";&test("$nkf --oc=UTF-16LE-BOM",h("82A0"),h("FFFE4230")); +print "SJIS to -w32... ";&test("$nkf -w32",h("82A0"),h("0000FEFF00003042")); +print "SJIS to UTF-32... ";&test("$nkf --oc=UTF-32",h("82A0"),h("0000FEFF00003042")); +print "SJIS to -w32B... ";&test("$nkf -w32B",h("82A0"),h("0000FEFF00003042")); +print "SJIS to -w32B0... ";&test("$nkf -w32B0",h("82A0"),h("00003042")); +print "SJIS to UTF-32BE... ";&test("$nkf --oc=UTF-32BE",h("82A0"),h("00003042")); +print "SJIS to UTF-32BE-BOM...";&test("$nkf --oc=UTF-32BE-BOM",h("82A0"),h("0000FEFF00003042")); +print "SJIS to -w32L... ";&test("$nkf -w32L",h("82A0"),h("FFFE000042300000")); +print "SJIS to -w32L0... ";&test("$nkf -w32L0",h("82A0"),h("42300000")); +print "SJIS to UTF-32LE... ";&test("$nkf --oc=UTF-32LE",h("82A0"),h("42300000")); +print "SJIS to UTF-32LE-BOM...";&test("$nkf --oc=UTF-32LE-BOM",h("82A0"),h("FFFE000042300000")); + + +print "\nOther Features\n\n"; # Ambigous Case $example{'amb'} = unpack('u',<<'eofeof'); @@ -222,21 +281,28 @@ M)4(;*$(*&RA))4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q >)4(P,25",#$E0C`Q)4(P,25",#$E0C`Q)4(;*$(* eofeof -print "Ambiguous Case. "; +printf "%-40s", "Ambiguous Case."; &test("$nkf -j",$example{'amb'},$example{'amb.euc'}); # Input assumption -print "SJIS Input assumption "; +printf "%-40s", "SJIS Input assumption"; &test("$nkf -jSx",$example{'amb'},$example{'amb.sjis'}); +# UTF8_STR_OF_JIS_SECOND_LEVEL_KANJI +$example{'utf8_str_of_jis_second_level_kanji'} = "\xe9\xa4\x83\xe5\xad\x90"; + +printf "%-40s", "UTF8_STR_OF_JIS_SECOND_LEVEL_KANJI"; + &test("$nkf -w",$example{'utf8_str_of_jis_second_level_kanji'}, + $example{'utf8_str_of_jis_second_level_kanji'}); + # Broken JIS -print "Broken JIS "; +printf "%-40s", "Broken JIS"; $input = $example{'jis'}; $input =~ s/\033//g; &test("$nkf -Be",$input,$example{'euc'}); -print "Broken JIS is safe on Normal JIS? "; +printf "%-40s", "Broken JIS is safe on Normal JIS?"; $input = $example{'jis'}; &test("$nkf -Be",$input,$example{'euc'}); @@ -250,12 +316,14 @@ $example{'test_data/cp932.ans'} = unpack('u',<<'eofeof'); %_/$@_.X` eofeof -print "test_data/cp932 "; +printf "%-40s", "test_data/cp932"; &test("$nkf -eS",$example{'test_data/cp932'},$example{'test_data/cp932.ans'}); +printf "%-40s", "test_data/cp51932"; + &test("$nkf --ic=cp932 --oc=cp51932",$example{'test_data/cp932'},$example{'test_data/cp932.ans'}); # test_data/cp932inv -print "test_data/cp932inv "; - &test("$nkf -sE --cp932inv",$example{'test_data/cp932.ans'},$example{'test_data/cp932'}); +printf "%-40s", "test_data/cp932inv"; + &test("$nkf -sE --cp932",$example{'test_data/cp932.ans'},$example{'test_data/cp932'}); # test_data/no-cp932inv @@ -263,9 +331,73 @@ $example{'test_data/no-cp932inv.ans'} = unpack('u',<<'eofeof'); %[N\@[NP` eofeof -print "test_data/no-cp932inv "; - &test("$nkf -sE",$example{'test_data/cp932.ans'},$example{'test_data/no-cp932inv.ans'}); +printf "%-40s", "test_data/no-cp932inv"; + &test("$nkf -sE --no-cp932",$example{'test_data/cp932.ans'},$example{'test_data/no-cp932inv.ans'}); +# JIS X 0212 +$example{'jisx0212_euc'} = "\x8F\xA2\xAF\x8F\xED\xE3"; +$example{'jisx0212_jis'} = "\x1b\x24\x28\x44\x22\x2f\x6d\x63\x1b\x28\x42"; + +printf "%-40s", "ISO-2022-JP-1 to EUC-JP"; + &test("$nkf --ic=ISO-2022-JP-1 --oc=EUC-JP",$example{'jisx0212_jis'},$example{'jisx0212_euc'}); + +printf "%-40s", "EUC-JP to ISO-2022-JP-1"; + &test("$nkf --ic=EUC-JP --oc=ISO-2022-JP-1",$example{'jisx0212_euc'},$example{'jisx0212_jis'}); +# JIS X 0213 + +$example{'jisx0213_sjis'} = unpack('u',<<'eofeof'); +0@:V(G9ATF)WJIN_W\$#\]``` +eofeof + +$example{'jisx0213_euc'} = unpack('u',<<'eofeof'); +2HJ^O_<_5S_WTJ/[YCZ&AC_[V +eofeof + +$example{'jisx0213_jis2000'} = unpack('u',<<'eofeof'); +;&R0H3R(O+WU/54]]="A^>1LD*%`A(7YV&RA" +eofeof +$example{'jisx0213_jis2004'} = unpack('u',<<'eofeof'); +;&R0H42(O+WU/54]]="A^>1LD*%`A(7YV&RA" +eofeof + +printf "%-40s", "Shift_JISX0213 to EUC-JISX0213"; + &test("$nkf --ic=Shift_JISX0213 --oc=EUC-JISX0213",$example{'jisx0213_sjis'},$example{'jisx0213_euc'}); + +printf "%-40s", "EUC-JISX0213 to Shift_JISX0213"; + &test("$nkf --ic=EUC-JISX0213 --oc=Shift_JISX0213",$example{'jisx0213_euc'},$example{'jisx0213_sjis'}); + +printf "%-40s", "ISO-2022-JP-3 to EUC-JISX0213"; + &test("$nkf --ic=ISO-2022-JP-3 --oc=EUC-JISX0213",$example{'jisx0213_jis2000'},$example{'jisx0213_euc'}); + +printf "%-40s", "ISO-2022-JP-2004 to EUC-JISX0213"; + &test("$nkf --ic=ISO-2022-JP-2004 --oc=EUC-JISX0213",$example{'jisx0213_jis2004'},$example{'jisx0213_euc'}); + +printf "%-40s", "EUC-JISX0213 to ISO-2022-JP-2004"; + &test("$nkf --ic=EUC-JISX0213 --oc=ISO-2022-JP-2004",$example{'jisx0213_euc'},$example{'jisx0213_jis2004'}); +# test_data/no_best_fit_chars + +$example{'test_data/no_best_fit_chars'} = unpack('u',<<'eofeof'); +;XH"5XHBE[[R-[[^@[[^A[[^B[[^C[[^D[[^E +eofeof + +printf "%-40s", "no_best_fit_chars (eucJP-ascii)"; + &test("$nkf -W --oc=eucJP-ascii --no-best-fit-chars",$example{'test_data/no_best_fit_chars'},''); +$example{'test_data/no_best_fit_chars_ms'} = unpack('u',<<'eofeof'); +9PJ+"H\*EPJ;"K.*`E.*`EN*`ON*(DN.`G``` +eofeof + +printf "%-40s", "no_best_fit_chars (eucJP-ms)"; + &test("$nkf -W --oc=eucJP-ms --no-best-fit-chars",$example{'test_data/no_best_fit_chars_ms'},''); + +$example{'test_data/no_best_fit_chars_cp932'} = unpack('u',<<'eofeof'); +MPJ'"HL*CPJ7"IL*IPJK"J\*LPJW"KL*OPK+"L\*UPK?"N,*YPKK"N\.`PX'# +M@L.#PX3#A<.&PX?#B,.)PXK#B\.,PXW#CL./PY##D<.2PY/#E,.5PY;#F,.9 +MPYK#F\.L>(B +# X0201 仮名 # X0201->X0208 conversion # X0208 aphabet -> ASCII -# X0201 $BAj8_JQ49(B +# X0201 相互変換 print "\nX0201 test\n\n"; @@ -324,11 +456,11 @@ $example{'x0201.utf'} = unpack('u',<<'eofeof'); MY86HZ*>2XX*KXX*MXX*OXX*QXX*SXX*LXX*NXX*PXX*RXX*T"N6%J.B+L>^\ MH>^\HN^\H^^\I.^\I>^\IN^\I^^]@>^]@N^]@^^]A.^]A>^]AN^]APKEA:CH MJ)CEC[?OO('OO*#OO(/OO(3OO(7OO+[OO(;OO(KOO(COO(GBB)+OO(OOO)WO -MO+OOO+WOO9OOO9WOOZ4*Y8V*Z*>2[[VV[[VW[[VX/>^]N>^]NN^]MN^^GN^] -MM^^^GN^]N.^^GN^]N>^^GN^]NN^^GN.!J`KEC8KHIY+OOHKOOI_OOHOOOI_O -MOHSOOI_OOHWOOI_OOH[OOI_OO;?OOI_OO;'OOIX*[[Z*[[Z?[[Z+[[Z?[[Z, -M"FAA;FMA:W4@[[Z*[[Z?[[Z+[[Z?[[Z,[[VD"N^^BN^^G^^^B^^^G^^^C.^^ -2G>^]H0KEC8KHIY+C@:[EOHP* +MO+OOO+WOO9OOO9W"I0KEC8KHIY+OO;;OO;?OO;@][[VY[[VZ[[VV[[Z>[[VW +M[[Z>[[VX[[Z>[[VY[[Z>[[VZ[[Z>XX&H"N6-BNBGDN^^BN^^G^^^B^^^G^^^ +MC.^^G^^^C>^^G^^^CN^^G^^]M^^^G^^]L>^^G@KOOHKOOI_OOHOOOI_OOHP* +M:&%N:V%K=2#OOHKOOI_OOHOOOI_OOHSOO:0*[[Z*[[Z?[[Z+[[Z?[[Z,[[Z= +1[[VA"N6-BNBGDN.!KN6^C`H` eofeof $example{'x0201.jis'} = unpack('u',<<'eofeof'); @@ -360,24 +492,42 @@ eofeof # -X is necessary to allow X0201 in SJIS # -Z convert X0208 alphabet to ASCII -print "X0201 conversion: SJIS "; +printf "%-40s", "X0201 conversion: SJIS"; &test("$nkf -jXZ",$example{'x0201.sjis'},$example{'x0201.x0208'}); -print "X0201 conversion: JIS "; +printf "%-40s", "X0201 conversion: JIS"; &test("$nkf -jZ",$example{'x0201.jis'},$example{'x0201.x0208'}); -print "X0201 conversion:SI/SO "; +printf "%-40s", "X0201 conversion: SI/SO"; &test("$nkf -jZ",$example{'x0201.sosi'},$example{'x0201.x0208'}); -print "X0201 conversion: EUC "; +printf "%-40s", "X0201 conversion: EUC"; &test("$nkf -jZ",$example{'x0201.euc'},$example{'x0201.x0208'}); -print "X0201 conversion: UTF8 "; +printf "%-40s", "X0201 conversion: UTF8"; &test("$nkf -jZ",$example{'x0201.utf'},$example{'x0201.x0208'}); +printf "%-40s", "-wZ"; &test("$nkf -wZ", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ0"; &test("$nkf -wZ0", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ1"; &test("$nkf -wZ1", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + " aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ2"; &test("$nkf -wZ2", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + " aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ3"; &test("$nkf -wZ3", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ4"; &test("$nkf -wZ4", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xEF\xBD\xB1"); # -x means X0201 output -print "X0201 output: SJIS "; +printf "%-40s", "X0201 output: SJIS"; &test("$nkf -xs",$example{'x0201.euc'},$example{'x0201.sjis'}); -print "X0201 output: JIS "; +printf "%-40s", "X0201 output: JIS"; &test("$nkf -xj",$example{'x0201.sjis'},$example{'x0201.jis'}); -print "X0201 output: EUC "; +printf "%-40s", "X0201 output: EUC"; &test("$nkf -xe",$example{'x0201.jis'},$example{'x0201.euc'}); -print "X0201 output: UTF8 "; +printf "%-40s", "X0201 output: UTF8"; &test("$nkf -xw",$example{'x0201.jis'},$example{'x0201.utf'}); # MIME decode @@ -445,8 +595,8 @@ M(TB1./RD[=ALH0@I"tmp1");print OUT pack('u',$tmp);close(OUT); + # open(OUT,">tmp1");printf "%-40s", OUT pack('u',$tmp);close(OUT); # unbuf mode implies more pessimistic decode -print "MIME decode (unbuf) "; +printf "%-40s", "MIME decode (unbuf)"; $tmp = &test("$nkf -jmNu",$example{'mime.iso2022'},$example{'mime.unbuf'},$example{'mime.unbuf.alt'}); - # open(OUT,">tmp2");print OUT pack('u',$tmp);close(OUT); -print "MIME decode (base64) "; + # open(OUT,">tmp2");printf "%-40s", OUT pack('u',$tmp);close(OUT); +printf "%-40s", "MIME decode (base64)"; &test("$nkf -jmB",$example{'mime.base64'},$example{'mime.base64.ans'}); #MIME BASE64 must be LF? @@ -492,8 +642,8 @@ eofeof # Without -l, ISO-8859-1 was handled as X0201. -print "MIME ISO-8859-1 (Q) "; - &test("$nkf -ml",$example{'mime.is8859'},$example{'mime.is8859.ans'}); +printf "%-40s", "MIME ISO-8859-1 (Q)"; + &test("$nkf -jml",$example{'mime.is8859'},$example{'mime.is8859.ans'}); # test for -f is not so simple. @@ -509,7 +659,7 @@ $example{'test_data/cr.ans'} = unpack('u',<<'eofeof'); 7&R1")$8D.21(&RA""G1E5)3 -M2D95:U=#4F)'>6A#/ST*(#T_25-/+3(P,C(M2E`_0C]'>5)#2D8T:UAY4F=* -M1T5K66E2:TI#46M::5%O2D=G8DM%23T_/0HM+2TM"E-U8FIE8W0Z(&%A86$@ -M86%A82!A86%A(&%A86$@86%A82!A86%A(&%A86$*(#T_25-/+3(P,C(M2E`_ -M0C]'>5)#2D-):TI#46U*0V=K2VAS;U%G/3T_/2!A86%A(&%A86$@86%A82!A -086%A"B!A86%A"BTM+2T*"@`` -eofeof - -print "test_data/mime_out "; - &test("$nkf -jM",$example{'test_data/mime_out'},$example{'test_data/mime_out.ans'}); -# test_data/mime_out2 - -$example{'test_data/mime_out2'} = unpack('u',<<'eofeof'); -M5&AI&ES=&EN -M9R!M97-S86=E(&AA;F1L:6YG('-O9G1W87)E+@H*4W5B:F5C=#H@=&5S=#$@ -M=&5S=#(@@L2"MX+&@J<@=&5S=#,@@L2"MX+&@O$@=&5S=#0*"E-U8FIE8W0Z -M('1EXSJ()/ZEGN,ZB!! -M4T-)22!!4T-)29/ZEGN,ZB!!4T-)20H*@J`@@J(@@J0@@J8@@J@@@JD@@JL@ -M@JT@@J\@@K$@@K,@@K4@@K<@@KD@@KL@@KT@@K\@@L(@@L0@@L8@@L@@@LD@ -8@LH@@LL@@LP*"@H*"@H*"@H*"@H*"@H* -eofeof - -$example{'test_data/mime_out2.ans'} = unpack('u',<<'eofeof'); -M5&AI5)#1WEH0TE" -M$Q81&AS1WEH0S\]($%30TE)($%30TE)"B`]/TE33RTR,#(R+4I0 -M/T(_1WE20U)N>$Q81&AS1WEH0TE"6A#24)S:U%I47)'>6A#24)S:U%I471'>6A#24)S:U%I -M479'>6A#/ST*(#T_25-/+3(P,C(M2E`_0C])0G-K46E1>$=Y:$-)0G-K46E1 -M>D=Y:$-)0G-K46E1,4=Y:$-)0G-K46E1,T=Y:$,_/0H@/3])4T\M,C`R,BU* -M4#]"/TE"6A#24)S:U%I -M4D5'>6A#24)S:U%I4D='>6A#24)S:U%I4DE'>6A#/ST*(#T_25-/+3(P,C(M -M2E`_0C])0G-K46E22T=Y:$-)0G-K46E23$=Y:$-)0G-K46E234=Y:$-)0G-K -M46E23D=Y:$,_/0H@/3])4T\M,C`R,BU*4#]"/TE"5)#2D-):TI#46U*0V=K2VE1 +M5%X2D1-:TY343-*1&MB2T5)/3\]"B`]/TE33RTR,#(R+4I0/T(_ +M1WE20TI$5)#2D-):TI#46U*0V=K2VE1 +M5%X2D1-:TY343-'>6A#/ST*(#T_25-/+3(P,C(M2E`_0C]'>5)# +M2D1K:T]Y43E*1#AK45-214I%66M30U)+2D5S:U1#4DY*131K5'E24TI&56M7 +M0U)B1WEH0S\]"B`]/TE33RTR,#(R+4I0/T(_1WE20TI&-&M8>5)G2D=%:UEI +M4FM*0U%K6FE1;TI'9V)+14D]/ST*+2TM+0I3=6)J96-T.B!A86%A(&%A86$@ +M86%A82!A86%A(&%A86$@86%A82!A86%A"B`]/TE33RTR,#(R+4I0/T(_1WE2 +M0TI#26M*0U%M2D-G:TMH5)#2D-):TI#46U*0V=K2VE1 +M5%X2D1-:TY343-*1&MK3WE1.4=Y:$,_/0H@/3])4T\M,C`R,BU* +M4#]"/T=Y4D-*1#AK45-214I%66M30U)+2D5S:U1#4DY*131K5'E24TI&56M7 +M0U)B2D8T:UAY4F='>6A#/ST*(#T_25-/+3(P,C(M2E`_0C]'>5)#2D=%:UEI +M4FM*0U%K6FE1;TI'9V)+14D]/ST*+2TM+0I3=6)J96-T.B!A86%A(&%A86$@ +M86%A82!A86%A(&%A86$@86%A82!A86%A(#T_25-/+3(P,C(M2E`_0C]'>5)# +M2D-):TI"5)#2D-9:TM#47%' +@>6A#/ST@86%A82!A86%A(&%A86$@86%A80HM+2TM"@H` +eofeof + +$example{'test_data/mime_out.ans.alt3'} = unpack('u',<<'eofeof'); +M"BTM+2T*4W5B:F5C=#H@86%A82!A86%A(&%A86$@86%A82!A86%A(&%A86$@ +M86%A82!A86%A(&%A86$@86%A82!A86%A(&%A86$@86%A80H@86%A80HM+2TM +M"E-U8FIE8W0Z(#T_25-/+3(P,C(M2E`_0C]'>5)#2D-):TI#46U*0V=K2VE1 +M5%X2D1-:TY343-*1&MK3WE1.4=Y:$,_/0H@/3])4T\M,C`R,BU* +M4#]"/T=Y4D-*1#AK45-214I%66M30U)+2D5S:U1#4DY*131K5'E24TI&56M7 +M0U)B2D8T:UAX5)#2D=!:UE3 +M4FE*1U%K2D-2;4I#9VMA0G-O46<]/3\]"BTM+2T*4W5B:F5C=#H@86%A82!A +M86%A(&%A86$@86%A82!A86%A(&%A86$@86%A82`]/TE33RTR,#(R+4I0/T(_ +M1WE20TI#26M*0G-O46<]/3\]"B`]/TE33RTR,#(R+4I0/T(_1WE20TI#66M+ +D0U%Q1WEH0S\](&%A86$@86%A82!A86%A(&%A86$*+2TM+0H*1 +eofeof + +printf "%-40s", "test_data/mime_out"; + &test("$nkf -jM",$example{'test_data/mime_out'},$example{'test_data/mime_out.ans'},$example{'test_data/mime_out.ans.alt'},$example{'test_data/mime_out.ans.alt2'},$example{'test_data/mime_out.ans.alt3'}); +# test_data/mime_out3 + +$example{'test_data/mime_out3'} = "\x82\xD9\x82\xB0 A"; + +$example{'test_data/mime_out3.ans'} = "=?ISO-2022-JP?B?GyRCJFskMhsoQg==?= A"; + +printf "%-40s", "test_data/mime_out3"; + &test("$nkf -jSM",$example{'test_data/mime_out3'},$example{'test_data/mime_out3.ans'}); +# test_data/multi-line $example{'test_data/multi-line'} = unpack('u',<<'eofeof'); MI,JDK*2DI,JDK*2DI,JDK*'!I*2DKJ3GI*:DK*2BI.JDWJ2WI,:AH@"DLZ3L @@ -652,8 +803,28 @@ MHJ3(I,&DY:2FI,>DP:3GI/.DKJ3LI.NDJZ3BI+>D\Z3*I*2AHPJDLZ2SI,_# 8NZ2DN=2AHP`*I+.DLZ3/P[NDI+G4H:,* eofeof -print "test_data/multi-line "; +printf "%-40s", "test_data/multi-line"; &test("$nkf -e",$example{'test_data/multi-line'},$example{'test_data/multi-line.ans'}); +# test_data/-Z4 + +$example{'test_data/-Z4'} = unpack('u',<<'eofeof'); +MH:.AUJ'7H:*AIJ&\H:NAK*6AI:*EHZ6DI:6EIJ6GI:BEJ:6JI:NEK*6MI:ZE +MKZ6PI;&ELJ6SI;2EM:6VI;>EN*6YI;JENZ6\I;VEOJ6_I<"EP:7"I<.EQ*7% +MI<:EQZ7(I&EXJ7CI>2EY:7FI>>EZ*7II>JEZZ7LI>VE[Z7RI?.E]``` +eofeof + +$example{'test_data/-Z4.ans'} = unpack('u',<<'eofeof'); +MCJ&.HHZCCJ2.I8ZPCMZ.WXZGCK&.J(ZRCJF.LXZJCK2.JXZUCK:.MH[>CK>. +MMX[>CKB.N([>CKF.N8[>CKJ.NH[>CKN.NX[>CKR.O([>CKV.O8[>CKZ.OH[> +MCK^.OX[>CL".P([>CL&.P8[>CJ^.PH["CMZ.PX[#CMZ.Q([$CMZ.Q8[&CL>. +MR([)CLJ.RH[>CLJ.WX[+CLN.WH[+CM^.S([,CMZ.S([?CLV.S8[>CLV.WX[. +MCLZ.WH[.CM^.SX[0CM&.TH[3CJR.U(ZMCM6.KH[6CM>.V([9CMJ.VX[&RA""@`` eofeof -print "test_data/non-strict-mime "; +printf "%-40s", "test_data/non-strict-mime"; &test("$nkf -jmN",$example{'test_data/non-strict-mime'},$example{'test_data/non-strict-mime.ans'}); # test_data/q-encode-softrap @@ -715,7 +886,7 @@ $example{'test_data/q-encode-softrap.ans'} = unpack('u',<<'eofeof'); >&R1"-$$[>B4S(3PE221.&RA""ALD0DI1-#D;*$(* eofeof -print "test_data/q-encode-softrap "; +printf "%-40s", "test_data/q-encode-softrap"; &test("$nkf -jmQ",$example{'test_data/q-encode-softrap'},$example{'test_data/q-encode-softrap.ans'}); # test_data/rot13 @@ -735,7 +906,7 @@ M4U-U4U-3>5-34SE355-V4%%?>6%K4WU3.5-54WIY(F-H4V13/5,O4VI31%!2 A&RA""@HE(')P=6(@)W5B='(G('P@87AS("UE"G5B='(* eofeof -print "test_data/rot13 "; +printf "%-40s", "test_data/rot13"; &test("$nkf -jr",$example{'test_data/rot13'},$example{'test_data/rot13.ans'}); # test_data/slash @@ -747,8 +918,8 @@ $example{'test_data/slash.ans'} = unpack('u',<<'eofeof'); 7("`]/U8\5"U5.5=%2RTK.U4J..# +MH>.#O..#J^BAJ.FAC"#HJ:;IJ)/GE*CC@Z'C@[SC@ZOHH:CIH8P@Z*FFZ:B3 +3YY2HXX.AXX.\XX.KZ*&HZ:&,"@`` +eofeof + +$example{'test_data/bugs10904.ans'} = unpack('u',<<'eofeof'); +M4W5B:F5C=#H@/3])4T\M,C`R,BU*4#]"/T=Y4D-/,C0T33`Q4DI716A00U9R +?4U0Q0V%H5)#2E=%:%!#5G)35#%#86AS;U%I06)*14DW +?8FIG>E1616Q94T4X2E=T2E!52G%'>6A#24$]/3\]"@`` +M(#T_25-/+3(P,C(M2E`_0C]'>5)#3S(T-$TP,5)*5T5H4$-65)#6#)98DM%23T_/0H` +eofeof + +printf "%-40s", "test_data/bug19779 "; + &test("$nkf -jM",$example{'test_data/bug19779'},$example{'test_data/bug19779.ans'}); + + if (!NKF) { +printf "%-40s", "Guess NL"; +&command_tests( + "$nkf --guess","none", "ASCII\n", + "$nkf --guess","\n", "ASCII (LF)\n", + "$nkf --guess","\n\n", "ASCII (LF)\n", + "$nkf --guess","\n\r", "ASCII (MIXED NL)\n", + "$nkf --guess","\n\r\n", "ASCII (MIXED NL)\n", + "$nkf --guess","\n.\n", "ASCII (LF)\n", + "$nkf --guess","\n.\r", "ASCII (MIXED NL)\n", + "$nkf --guess","\n.\r\n", "ASCII (MIXED NL)\n", + "$nkf --guess","\r", "ASCII (CR)\n", + "$nkf --guess","\r\r", "ASCII (CR)\n", + "$nkf --guess","\r\r\n", "ASCII (MIXED NL)\n", + "$nkf --guess","\r.\n", "ASCII (MIXED NL)\n", + "$nkf --guess","\r.\r", "ASCII (CR)\n", + "$nkf --guess","\r.\r\n", "ASCII (MIXED NL)\n", + "$nkf --guess","\r\n", "ASCII (CRLF)\n", + "$nkf --guess","\r\n\n", "ASCII (MIXED NL)\n", + "$nkf --guess","\r\n\r", "ASCII (MIXED NL)\n", + "$nkf --guess","\r\n\r\n", "ASCII (CRLF)\n", + "$nkf --guess","\r\n.\n", "ASCII (MIXED NL)\n", + "$nkf --guess","\r\n.\r", "ASCII (MIXED NL)\n", + "$nkf --guess","\r\n.\r\n", "ASCII (CRLF)\n"); + } + +printf "%-40s", "Convert NL to LF"; +&command_tests( + "$nkf -jLu","none", "none", + "$nkf -jLu","\n", "\n", + "$nkf -jLu","\n\n", "\n\n", + "$nkf -jLu","\n\r", "\n\n", + "$nkf -jLu","\n\r\n", "\n\n", + "$nkf -jLu","\n.\n", "\n.\n", + "$nkf -jLu","\n.\r", "\n.\n", + "$nkf -jLu","\n.\r\n", "\n.\n", + "$nkf -jLu","\r", "\n", + "$nkf -jLu","\r\r", "\n\n", + "$nkf -jLu","\r\r\n", "\n\n", + "$nkf -jLu","\r.\n", "\n.\n", + "$nkf -jLu","\r.\r", "\n.\n", + "$nkf -jLu","\r.\r\n", "\n.\n", + "$nkf -jLu","\r\n", "\n", + "$nkf -jLu","\r\n\n", "\n\n", + "$nkf -jLu","\r\n\r", "\n\n", + "$nkf -jLu","\r\n\r\n", "\n\n", + "$nkf -jLu","\r\n.\n", "\n.\n", + "$nkf -jLu","\r\n.\r", "\n.\n", + "$nkf -jLu","\r\n.\r\n", "\n.\n"); + +printf "%-40s", "Convert NL to LF"; +&command_tests( + "$nkf -jLm","none", "none", + "$nkf -jLm","\n", "\r", + "$nkf -jLm","\n\n", "\r\r", + "$nkf -jLm","\n\r", "\r\r", + "$nkf -jLm","\n\r\n", "\r\r", + "$nkf -jLm","\n.\n", "\r.\r", + "$nkf -jLm","\n.\r", "\r.\r", + "$nkf -jLm","\n.\r\n", "\r.\r", + "$nkf -jLm","\r", "\r", + "$nkf -jLm","\r\r", "\r\r", + "$nkf -jLm","\r\r\n", "\r\r", + "$nkf -jLm","\r.\n", "\r.\r", + "$nkf -jLm","\r.\r", "\r.\r", + "$nkf -jLm","\r.\r\n", "\r.\r", + "$nkf -jLm","\r\n", "\r", + "$nkf -jLm","\r\n\n", "\r\r", + "$nkf -jLm","\r\n\r", "\r\r", + "$nkf -jLm","\r\n\r\n", "\r\r", + "$nkf -jLm","\r\n.\n", "\r.\r", + "$nkf -jLm","\r\n.\r", "\r.\r", + "$nkf -jLm","\r\n.\r\n", "\r.\r"); + +printf "%-40s", "Convert NL to CRLF"; +&command_tests( + "$nkf -jLw","none", "none", + "$nkf -jLw","\n", "\r\n", + "$nkf -jLw","\n\n", "\r\n\r\n", + "$nkf -jLw","\n\r", "\r\n\r\n", + "$nkf -jLw","\n\r\n", "\r\n\r\n", + "$nkf -jLw","\n.\n", "\r\n.\r\n", + "$nkf -jLw","\n.\r", "\r\n.\r\n", + "$nkf -jLw","\n.\r\n", "\r\n.\r\n", + "$nkf -jLw","\r", "\r\n", + "$nkf -jLw","\r\r", "\r\n\r\n", + "$nkf -jLw","\r\r\n", "\r\n\r\n", + "$nkf -jLw","\r.\n", "\r\n.\r\n", + "$nkf -jLw","\r.\r", "\r\n.\r\n", + "$nkf -jLw","\r.\r\n", "\r\n.\r\n", + "$nkf -jLw","\r\n", "\r\n", + "$nkf -jLw","\r\n\n", "\r\n\r\n", + "$nkf -jLw","\r\n\r", "\r\n\r\n", + "$nkf -jLw","\r\n\r\r\n", "\r\n\r\n\r\n", + "$nkf -jLw","\r\n.\n", "\r\n.\r\n", + "$nkf -jLw","\r\n.\r", "\r\n.\r\n", + "$nkf -jLw","\r\n.\r\n", "\r\n.\r\n"); # end