OSDN Git Service

Release 2.1.1.
[nkf/nkf.git] / nkf_test.pl
index 9b0228d..30b754b 100644 (file)
@@ -2,8 +2,6 @@
 #
 # nkf test program for nkf-2
 #
-# $Id: nkf_test.pl,v 1.31 2008/11/18 21:43:19 naruse Exp $
-#
 #    Shinji KONO <kono@ie.u-ryukyu.ac.jp>
 # Sun Aug 18 12:25:40 JST 1996
 # Sun Nov  8 00:16:06 JST 1998
@@ -186,7 +184,7 @@ print "UTF8 to U16L0..";&test("$nkf --ic=utf-8 --oc=utf-16le",              $example{'utf8N'
 print "UTF8 to U16B...";&test("$nkf --ic=utf-8 --oc=utf-16be-bom",     $example{'utf8N'},$example{'u16B'});
 print "UTF8 to U16B0..";&test("$nkf --ic=utf-8 --oc=utf-16be",         $example{'utf8N'},$example{'u16B0'});
 
-
+print "UTF8 to UTF8...";&test("$nkf -w","\xf0\xa0\x80\x8b","\xf0\xa0\x80\x8b");
 
 # From JIS
 
@@ -216,8 +214,37 @@ print "UTF8 to SJIS...";&test("$nkf -s",$example{'utf1'},$example{'sjis1'});
 print "UTF8 to EUC ...";&test("$nkf -e",$example{'utf1'},$example{'euc1'});
 print "UTF8 to UTF8...";&test("$nkf -w",$example{'utf1'},$example{'utf1'});
 
-print "\nOther Features\n\n";
+# UTF
+sub h {pack("H*",shift)}
+print "SJIS to -w...          ";&test("$nkf -w",h("82A0"),h("E38182"));
+print "SJIS to -w8...         ";&test("$nkf -w8",h("82A0"),h("EFBBBFE38182"));
+print "SJIS to -w80...        ";&test("$nkf -w80",h("82A0"),h("E38182"));
+print "SJIS to UTF-8...       ";&test("$nkf --oc=UTF-8",h("82A0"),h("E38182"));
+print "SJIS to UTF-8N...      ";&test("$nkf --oc=UTF-8N",h("82A0"),h("E38182"));
+print "SJIS to UTF-8-BOM...   ";&test("$nkf --oc=UTF-8-BOM",h("82A0"),h("EFBBBFE38182"));
+print "SJIS to -w16...        ";&test("$nkf -w16",h("82A0"),h("FEFF3042"));
+print "SJIS to UTF-16...      ";&test("$nkf --oc=UTF-16",h("82A0"),h("FEFF3042"));
+print "SJIS to -w16B...       ";&test("$nkf -w16B",h("82A0"),h("FEFF3042"));
+print "SJIS to -w16B0...      ";&test("$nkf -w16B0",h("82A0"),h("3042"));
+print "SJIS to UTF-16BE...    ";&test("$nkf --oc=UTF-16BE",h("82A0"),h("3042"));
+print "SJIS to UTF-16BE-BOM...";&test("$nkf --oc=UTF-16BE-BOM",h("82A0"),h("FEFF3042"));
+print "SJIS to -w16L...       ";&test("$nkf -w16L",h("82A0"),h("FFFE4230"));
+print "SJIS to -w16L0...      ";&test("$nkf -w16L0",h("82A0"),h("4230"));
+print "SJIS to UTF-16LE...    ";&test("$nkf --oc=UTF-16LE",h("82A0"),h("4230"));
+print "SJIS to UTF-16LE-BOM...";&test("$nkf --oc=UTF-16LE-BOM",h("82A0"),h("FFFE4230"));
+print "SJIS to -w32...        ";&test("$nkf -w32",h("82A0"),h("0000FEFF00003042"));
+print "SJIS to UTF-32...      ";&test("$nkf --oc=UTF-32",h("82A0"),h("0000FEFF00003042"));
+print "SJIS to -w32B...       ";&test("$nkf -w32B",h("82A0"),h("0000FEFF00003042"));
+print "SJIS to -w32B0...      ";&test("$nkf -w32B0",h("82A0"),h("00003042"));
+print "SJIS to UTF-32BE...    ";&test("$nkf --oc=UTF-32BE",h("82A0"),h("00003042"));
+print "SJIS to UTF-32BE-BOM...";&test("$nkf --oc=UTF-32BE-BOM",h("82A0"),h("0000FEFF00003042"));
+print "SJIS to -w32L...       ";&test("$nkf -w32L",h("82A0"),h("FFFE000042300000"));
+print "SJIS to -w32L0...      ";&test("$nkf -w32L0",h("82A0"),h("42300000"));
+print "SJIS to UTF-32LE...    ";&test("$nkf --oc=UTF-32LE",h("82A0"),h("42300000"));
+print "SJIS to UTF-32LE-BOM...";&test("$nkf --oc=UTF-32LE-BOM",h("82A0"),h("FFFE000042300000"));
 
+
+print "\nOther Features\n\n";
 # Ambigous Case
 
 $example{'amb'} = unpack('u',<<'eofeof');
@@ -399,10 +426,10 @@ printf "%-40s", "Microsoft UCS Mapping :";
 printf "%-40s", "CP932 to UTF-16BE :";
     &test("$nkf --ic=cp932 --oc=utf-16be",$example{'ms_ucs_map_1_sjis'},$example{'ms_ucs_map_1_utf16_ms'});
 
-# X0201 \e$B2>L>\e(B
+# X0201 仮名
 # X0201->X0208 conversion
 # X0208 aphabet -> ASCII
-# X0201 \e$BAj8_JQ49\e(B
+# X0201 相互変換
 
 print "\nX0201 test\n\n";
 
@@ -473,6 +500,24 @@ printf "%-40s", "X0201 conversion: EUC";
     &test("$nkf -jZ",$example{'x0201.euc'},$example{'x0201.x0208'});
 printf "%-40s", "X0201 conversion: UTF8";
     &test("$nkf -jZ",$example{'x0201.utf'},$example{'x0201.x0208'});
+printf "%-40s", "-wZ"; &test("$nkf -wZ",
+       "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+       "\xE3\x80\x80aA&\xE3\x82\xA2");
+printf "%-40s", "-wZ0"; &test("$nkf -wZ0",
+       "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+       "\xE3\x80\x80aA&\xE3\x82\xA2");
+printf "%-40s", "-wZ1"; &test("$nkf -wZ1",
+       "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+       " aA&\xE3\x82\xA2");
+printf "%-40s", "-wZ2"; &test("$nkf -wZ2",
+       "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+       "  aA&\xE3\x82\xA2");
+printf "%-40s", "-wZ3"; &test("$nkf -wZ3",
+       "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+       "\xE3\x80\x80aA&amp;\xE3\x82\xA2");
+printf "%-40s", "-wZ4"; &test("$nkf -wZ4",
+       "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2",
+       "\xE3\x80\x80aA&\xEF\xBD\xB1");
 # -x means X0201 output
 printf "%-40s", "X0201 output: SJIS";
     &test("$nkf -xs",$example{'x0201.euc'},$example{'x0201.sjis'});
@@ -957,6 +1002,56 @@ eofeof
 printf "%-40s", "test_data/bugs10904";
     &test("$nkf -Mj",$example{'test_data/bugs10904'},$example{'test_data/bugs10904.ans'});
 
+printf "%-40s", "test_data/ruby-dev:39722";
+    &test("$nkf -Mj",<<eom,<<eom);
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaあ
+eom
+=?US-ASCII?Q?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa?=
+ =?US-ASCII?Q?aaaaaaaaaaaaaaaaa?= =?ISO-2022-JP?B?GyRCJCIbKEI=?=
+eom
+# test_data/bug19779
+
+$example{'test_data/bug19779'} = unpack('u',<<'eofeof');
+2&R1","$;*$(*&R1"7V8;*$(*
+eofeof
+
+$example{'test_data/bug19779.ans'} = unpack('u',<<'eofeof');
+M/3])4T\M,C`R,BU*4#]"/T=Y4D--0T5B2T5)/3\]"CT_25-/+3(P,C(M2E`_
+10C]'>5)#6#)98DM%23T_/0H`
+eofeof
+
+printf "%-40s",  "test_data/bug19779    ";
+    &test("$nkf -jM",$example{'test_data/bug19779'},$example{'test_data/bug19779.ans'});
+
+printf "%-40s",  "[nkf-forum:47327]    ";
+    &test("$nkf -wM",pack('H*','feffd852de76d814dc45000a'),"=?UTF-8?B?8KSptvCVgYU=?=\n");
+
+printf "%-40s",  "[nkf-forum:47334]    ";
+    &test("$nkf -w",pack('H*','feff006100620063000a'),"abc\n");
+
+printf "%-40s",  "[nkf-bug:20079]    ";
+    &test("$nkf -jSxM","\xBB \xBB","=?ISO-2022-JP?B?GyhJOxsoQiAbKEk7GyhC?=");
+
+printf "%-40s",  "[nkf-bug:20079]    ";
+    &test("$nkf -SxMw8","\xBB \xBB","=?UTF-8?B?77u/7727IO+9uw==?=");
+
+printf "%-40s",  "[nkf-forum:48850]    ";
+    &test("$nkf -jSM",
+       "From: \x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0" .
+       " <x-xxxx@xxxxxxxxxxxx.co.jp>\n",
+       "From: =?ISO-2022-JP?B?GyRCJCIkIiQiJCIkIiQiJCIkIiQiGyhC?=" .
+       " <x-xxxx@xxxxxxxxxxxx.co.jp>\n");
+
+printf "%-40s",  "[nkf-bug:21393]-x  ";
+    &test("$nkf --ic=UTF-8 --oc=CP932",
+    "\xEF\xBD\xBC\xEF\xBE\x9E\xEF\xBD\xAC\xEF\xBD\xB0\xEF\xBE\x8F\xEF\xBE\x9D\xEF\xBD\xA5\xEF\xBE\x8E\xEF\xBE\x9F\xEF\xBE\x83\xEF\xBE\x84\xEF\xBD\xA1",
+    "\xBC\xDE\xAC\xB0\xCF\xDD\xA5\xCE\xDF\xC3\xC4\xA1");
+
+printf "%-40s",  "[nkf-bug:21393]-X  ";
+    &test("$nkf --ic=UTF-8 --oc=CP932 -X",
+    "\xEF\xBD\xBC\xEF\xBE\x9E\xEF\xBD\xAC\xEF\xBD\xB0\xEF\xBE\x8F\xEF\xBE\x9D\xEF\xBD\xA5\xEF\xBE\x8E\xEF\xBE\x9F\xEF\xBE\x83\xEF\xBE\x84\xEF\xBD\xA1",
+    "\x83W\x83\x83\x81[\x83}\x83\x93\x81E\x83|\x83e\x83g\x81B");
+
     if (!NKF) {
 printf "%-40s", "Guess NL";
 &command_tests(