#
# nkf test program for nkf-2
#
-# $Id: nkf_test.pl,v 1.31 2008/11/18 21:43:19 naruse Exp $
-#
# Shinji KONO <kono@ie.u-ryukyu.ac.jp>
# Sun Aug 18 12:25:40 JST 1996
# Sun Nov 8 00:16:06 JST 1998
print "UTF8 to U16B...";&test("$nkf --ic=utf-8 --oc=utf-16be-bom", $example{'utf8N'},$example{'u16B'});
print "UTF8 to U16B0..";&test("$nkf --ic=utf-8 --oc=utf-16be", $example{'utf8N'},$example{'u16B0'});
-
+print "UTF8 to UTF8...";&test("$nkf -w","\xf0\xa0\x80\x8b","\xf0\xa0\x80\x8b");
# From JIS
print "UTF8 to EUC ...";&test("$nkf -e",$example{'utf1'},$example{'euc1'});
print "UTF8 to UTF8...";&test("$nkf -w",$example{'utf1'},$example{'utf1'});
-print "\nOther Features\n\n";
+# UTF
+sub h {pack("H*",shift)}
+print "SJIS to -w... ";&test("$nkf -w",h("82A0"),h("E38182"));
+print "SJIS to -w8... ";&test("$nkf -w8",h("82A0"),h("EFBBBFE38182"));
+print "SJIS to -w80... ";&test("$nkf -w80",h("82A0"),h("E38182"));
+print "SJIS to UTF-8... ";&test("$nkf --oc=UTF-8",h("82A0"),h("E38182"));
+print "SJIS to UTF-8N... ";&test("$nkf --oc=UTF-8N",h("82A0"),h("E38182"));
+print "SJIS to UTF-8-BOM... ";&test("$nkf --oc=UTF-8-BOM",h("82A0"),h("EFBBBFE38182"));
+print "SJIS to -w16... ";&test("$nkf -w16",h("82A0"),h("FEFF3042"));
+print "SJIS to UTF-16... ";&test("$nkf --oc=UTF-16",h("82A0"),h("FEFF3042"));
+print "SJIS to -w16B... ";&test("$nkf -w16B",h("82A0"),h("FEFF3042"));
+print "SJIS to -w16B0... ";&test("$nkf -w16B0",h("82A0"),h("3042"));
+print "SJIS to UTF-16BE... ";&test("$nkf --oc=UTF-16BE",h("82A0"),h("3042"));
+print "SJIS to UTF-16BE-BOM...";&test("$nkf --oc=UTF-16BE-BOM",h("82A0"),h("FEFF3042"));
+print "SJIS to -w16L... ";&test("$nkf -w16L",h("82A0"),h("FFFE4230"));
+print "SJIS to -w16L0... ";&test("$nkf -w16L0",h("82A0"),h("4230"));
+print "SJIS to UTF-16LE... ";&test("$nkf --oc=UTF-16LE",h("82A0"),h("4230"));
+print "SJIS to UTF-16LE-BOM...";&test("$nkf --oc=UTF-16LE-BOM",h("82A0"),h("FFFE4230"));
+print "SJIS to -w32... ";&test("$nkf -w32",h("82A0"),h("0000FEFF00003042"));
+print "SJIS to UTF-32... ";&test("$nkf --oc=UTF-32",h("82A0"),h("0000FEFF00003042"));
+print "SJIS to -w32B... ";&test("$nkf -w32B",h("82A0"),h("0000FEFF00003042"));
+print "SJIS to -w32B0... ";&test("$nkf -w32B0",h("82A0"),h("00003042"));
+print "SJIS to UTF-32BE... ";&test("$nkf --oc=UTF-32BE",h("82A0"),h("00003042"));
+print "SJIS to UTF-32BE-BOM...";&test("$nkf --oc=UTF-32BE-BOM",h("82A0"),h("0000FEFF00003042"));
+print "SJIS to -w32L... ";&test("$nkf -w32L",h("82A0"),h("FFFE000042300000"));
+print "SJIS to -w32L0... ";&test("$nkf -w32L0",h("82A0"),h("42300000"));
+print "SJIS to UTF-32LE... ";&test("$nkf --oc=UTF-32LE",h("82A0"),h("42300000"));
+print "SJIS to UTF-32LE-BOM...";&test("$nkf --oc=UTF-32LE-BOM",h("82A0"),h("FFFE000042300000"));
+
+print "\nOther Features\n\n";
# Ambigous Case
$example{'amb'} = unpack('u',<<'eofeof');
printf "%-40s", "CP932 to UTF-16BE :";
&test("$nkf --ic=cp932 --oc=utf-16be",$example{'ms_ucs_map_1_sjis'},$example{'ms_ucs_map_1_utf16_ms'});
-# X0201 \e$B2>L>\e(B
+# X0201 仮名
# X0201->X0208 conversion
# X0208 aphabet -> ASCII
-# X0201 \e$BAj8_JQ49\e(B
+# X0201 相互変換
print "\nX0201 test\n\n";
&test("$nkf -jZ",$example{'x0201.euc'},$example{'x0201.x0208'});
printf "%-40s", "X0201 conversion: UTF8";
&test("$nkf -jZ",$example{'x0201.utf'},$example{'x0201.x0208'});
+printf "%-40s", "-wZ"; &test("$nkf -wZ",
+ "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+ "\xE3\x80\x80aA&\xE3\x82\xA2");
+printf "%-40s", "-wZ0"; &test("$nkf -wZ0",
+ "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+ "\xE3\x80\x80aA&\xE3\x82\xA2");
+printf "%-40s", "-wZ1"; &test("$nkf -wZ1",
+ "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+ " aA&\xE3\x82\xA2");
+printf "%-40s", "-wZ2"; &test("$nkf -wZ2",
+ "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+ " aA&\xE3\x82\xA2");
+printf "%-40s", "-wZ3"; &test("$nkf -wZ3",
+ "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+ "\xE3\x80\x80aA&\xE3\x82\xA2");
+printf "%-40s", "-wZ4"; &test("$nkf -wZ4",
+ "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+ "\xE3\x80\x80aA&\xEF\xBD\xB1");
# -x means X0201 output
printf "%-40s", "X0201 output: SJIS";
&test("$nkf -xs",$example{'x0201.euc'},$example{'x0201.sjis'});
printf "%-40s", "test_data/bugs10904";
&test("$nkf -Mj",$example{'test_data/bugs10904'},$example{'test_data/bugs10904.ans'});
+printf "%-40s", "test_data/ruby-dev:39722";
+ &test("$nkf -Mj",<<eom,<<eom);
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaあ
+eom
+=?US-ASCII?Q?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa?=
+ =?US-ASCII?Q?aaaaaaaaaaaaaaaaa?= =?ISO-2022-JP?B?GyRCJCIbKEI=?=
+eom
+# test_data/bug19779
+
+$example{'test_data/bug19779'} = unpack('u',<<'eofeof');
+2&R1","$;*$(*&R1"7V8;*$(*
+eofeof
+
+$example{'test_data/bug19779.ans'} = unpack('u',<<'eofeof');
+M/3])4T\M,C`R,BU*4#]"/T=Y4D--0T5B2T5)/3\]"CT_25-/+3(P,C(M2E`_
+10C]'>5)#6#)98DM%23T_/0H`
+eofeof
+
+printf "%-40s", "test_data/bug19779 ";
+ &test("$nkf -jM",$example{'test_data/bug19779'},$example{'test_data/bug19779.ans'});
+
+printf "%-40s", "[nkf-forum:47327] ";
+ &test("$nkf -wM",pack('H*','feffd852de76d814dc45000a'),"=?UTF-8?B?8KSptvCVgYU=?=\n");
+
+printf "%-40s", "[nkf-forum:47334] ";
+ &test("$nkf -w",pack('H*','feff006100620063000a'),"abc\n");
+
+printf "%-40s", "[nkf-bug:20079] ";
+ &test("$nkf -jSxM","\xBB \xBB","=?ISO-2022-JP?B?GyhJOxsoQiAbKEk7GyhC?=");
+
+printf "%-40s", "[nkf-bug:20079] ";
+ &test("$nkf -SxMw8","\xBB \xBB","=?UTF-8?B?77u/7727IO+9uw==?=");
+
+printf "%-40s", "[nkf-forum:48850] ";
+ &test("$nkf -jSM",
+ "From: \x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0" .
+ " <x-xxxx@xxxxxxxxxxxx.co.jp>\n",
+ "From: =?ISO-2022-JP?B?GyRCJCIkIiQiJCIkIiQiJCIkIiQiGyhC?=" .
+ " <x-xxxx@xxxxxxxxxxxx.co.jp>\n");
+
+printf "%-40s", "[nkf-bug:21393]-x ";
+ &test("$nkf --ic=UTF-8 --oc=CP932",
+ "\xEF\xBD\xBC\xEF\xBE\x9E\xEF\xBD\xAC\xEF\xBD\xB0\xEF\xBE\x8F\xEF\xBE\x9D\xEF\xBD\xA5\xEF\xBE\x8E\xEF\xBE\x9F\xEF\xBE\x83\xEF\xBE\x84\xEF\xBD\xA1",
+ "\xBC\xDE\xAC\xB0\xCF\xDD\xA5\xCE\xDF\xC3\xC4\xA1");
+
+printf "%-40s", "[nkf-bug:21393]-X ";
+ &test("$nkf --ic=UTF-8 --oc=CP932 -X",
+ "\xEF\xBD\xBC\xEF\xBE\x9E\xEF\xBD\xAC\xEF\xBD\xB0\xEF\xBE\x8F\xEF\xBE\x9D\xEF\xBD\xA5\xEF\xBE\x8E\xEF\xBE\x9F\xEF\xBE\x83\xEF\xBE\x84\xEF\xBD\xA1",
+ "\x83W\x83\x83\x81[\x83}\x83\x93\x81E\x83|\x83e\x83g\x81B");
+
if (!NKF) {
printf "%-40s", "Guess NL";
&command_tests(