X-Git-Url: http://git.sourceforge.jp/view?p=nkf%2Fnkf.git;a=blobdiff_plain;f=nkf_test.pl;h=4e5a1fbb8725c3fb48778d019c0354ddb59faa55;hp=204a4c57f27a38fac53baa2a45fa434c5ccf9afb;hb=5a54b532d970b9d76862ee47b5364bcbc2f82330;hpb=790a8fa7af84fa8e2fc34ea95fa6816ec5498e13 diff --git a/nkf_test.pl b/nkf_test.pl index 204a4c5..4e5a1fb 100644 --- a/nkf_test.pl +++ b/nkf_test.pl @@ -2,8 +2,6 @@ # # nkf test program for nkf-2 # -# $Id: nkf_test.pl,v 1.31 2008/11/18 21:43:19 naruse Exp $ -# # Shinji KONO # Sun Aug 18 12:25:40 JST 1996 # Sun Nov 8 00:16:06 JST 1998 @@ -186,7 +184,7 @@ print "UTF8 to U16L0..";&test("$nkf --ic=utf-8 --oc=utf-16le", $example{'utf8N' print "UTF8 to U16B...";&test("$nkf --ic=utf-8 --oc=utf-16be-bom", $example{'utf8N'},$example{'u16B'}); print "UTF8 to U16B0..";&test("$nkf --ic=utf-8 --oc=utf-16be", $example{'utf8N'},$example{'u16B0'}); - +print "UTF8 to UTF8...";&test("$nkf -w","\xf0\xa0\x80\x8b","\xf0\xa0\x80\x8b"); # From JIS @@ -359,6 +357,10 @@ $example{'jisx0213_jis2004'} = unpack('u',<<'eofeof'); ;&R0H42(O+WU/54]]="A^>1LD*%`A(7YV&RA" eofeof +$example{'jisx0213_utf8'} = unpack('u',<<'eofeof'); +:[[R'Y:REY:V!Y;>+Y;>BZ;ZB\*""B?"JFK(` +eofeof + printf "%-40s", "Shift_JISX0213 to EUC-JISX0213"; &test("$nkf --ic=Shift_JISX0213 --oc=EUC-JISX0213",$example{'jisx0213_sjis'},$example{'jisx0213_euc'}); @@ -373,6 +375,205 @@ printf "%-40s", "ISO-2022-JP-2004 to EUC-JISX0213"; printf "%-40s", "EUC-JISX0213 to ISO-2022-JP-2004"; &test("$nkf --ic=EUC-JISX0213 --oc=ISO-2022-JP-2004",$example{'jisx0213_euc'},$example{'jisx0213_jis2004'}); + +printf "%-40s", "EUC-JISX0213 to UTF-8"; + &test("$nkf --ic=EUC-JISX0213 -w",$example{'jisx0213_euc'},$example{'jisx0213_utf8'}); + +printf "%-40s", "UTF-8 to EUC-JISX0213"; + &test("$nkf -W --oc=EUC-JISX0213",$example{'jisx0213_utf8'},$example{'jisx0213_euc'}); + +printf "%-40s", "ISO-2022-JP-{1,3,2004} to UTF-8"; + &test("$nkf --ic=iso-2022-jp-2004 -w", + "\x1b\$B5Y\x1b\$(O~e\x1b\$(Q.!\x1b\$(P#M\x1b\$(D\\e\x1b(B", + "\xe4\xbc\x91\xe9\xb7\x97\xe4\xbf\xb1\xe5\x8c\x8b\xe8\xa4\xb1"); +printf "%-40s", "UTF-8 to ISO-2022-JP-2004"; + &test("$nkf -W --oc=iso-2022-jp-2004", + "\xe4\xbc\x91\xe9\xb7\x97\xe4\xbf\xb1\xe5\x8c\x8b\xe8\xa4\xb1", + "\x1b\$(Q5Y~e.!\x1b\$(P#M\x1b(B"); + +# test_data/jisx0213nonbmp + +$example{'test_data/jisx0213nonbmp'} = unpack('u',<<'eofeof'); +MKJ*OPJ_,K^"O^\_4S^//[O6Z]?+VJ?:R]N#W[/C^^:GYQ_G4^>[ZW?NS^\G[ +M[/S)_-'^YH^AH8^AJX^AKH^AMH^AQH^A\(^A]X^A^8^CHH^CI8^CIX^CL8^C +MLH^CN(^COX^CP8^CRH^CTH^CTX^CV8^CW(^C]X^DJH^DL8^DLH^DNH^DO8^D +MV8^DW(^DWH^DXX^DZH^DZX^D\H^D](^D]8^EI8^ELH^EOH^EQ(^EQX^EU8^E +MUH^E_H^HL(^HMX^HN(^HNH^HNX^HOX^HP(^HQ8^HR(^HRH^HRX^HVX^HYH^H +M[(^LHH^LJX^LL(^LT(^LY8^L[8^L\H^MI(^MJ8^MJH^MLH^MM(^MM8^MN8^M +MUH^M_8^NHX^NI(^NNH^NO(^NO8^NPH^NPX^NQ(^NQX^NR8^NU8^NUH^NUX^N +MVX^N]X^N^(^OJH^OOX^OP(^OPH^OPX^OSH^OV8^OX8^OZ8^OZH^O\(^O]8_N +MHX_NM(_NR8_NW(_NWH_NWX_NX(_OLH_OQX_OS8_OX8_OY(_PHH_PLX_PN8_P +MTX_P^X_QKH_QL(_QM8_QQ(_QW8_QX8_QYH_QZ8_Q]8_Q]X_Q^H_RH8_RHX_R +MI(_RJ(_RK(_RO8_RR(_RVX_R]8_R]H_SLH_SO8_SOH_SP(_STH_SW8_SWH_S +M\X_S](_S]8_S]X_S^X_S_8_THH_TI(_TIX_TKH_TKX_TM(_TM8_TO8_TPH_T +MSX_TZ8_TZX_T\H_T]8_T^8_UM8_UNH_UQH_UUH_UV(_UVH_UW8_UWX_UXX_U +MZH_U\(_U\X_VQ(_VSH_VW8_V]8_V_H_WH8_WHH_WLX_WMH_WY(_WY8_WZX_W +M[H_W\X_XJ8_XJH_XK(_XM(_XO(_XOH_XPH_XUH_XXX_X]X_X^8_X^H_YI8_Y +MKX_YLH_YN8_YPH_YR(_YV8_YWH_YYH_YZX_Y^H_Y_H_ZH8_ZK(_ZKX_ZSX_Z +MT(_ZUX_ZY8_ZYH_Z\8_Z\H_Z_H_[H8_[K(_[K8_[MH_[MX_[O8_[OH_[SH_[ +MSX_[UX_[VH_[W(_[W8_[X8_[Y8_[YX_[Z8_[\8_\HH_\HX_\N(_\PH_\S(_\ +MUH_\V8_\W8_\]H_]K(_]RX_]S(_]V8_]VX_]W8_]YX_][8_]\(_^I8_^J8_^ +=JX_^LH_^M8_^TX_^V(_^VH_^[H_^\(_^\H_^]@H` +eofeof + +$example{'test_data/jisx0213nonbmp.ans'} = unpack('u',<<'eofeof'); +M\*"`B_"AB+WPH8R;\*&1KO"AHKWPH*Z?\*&:M/"AN+3PHX>$\*.7A/"CG+_P +MHYVC\*.SOO"DG['PI9*.\*64CO"EG;'PI:>$\*6VH?"FJ[_PIKF`\*>#M/"G +MFH3PJ(FW\*B/C?"JAI#PH(*)\*""HO"@@J3PH(:B\*"(D_"@C*OPH(Z!\*"- +ML?"@C[GPH)&*\*"4B?"@EY;PH)BH\*"=C_"@H(?PH*"Z\*"BN?"@I;SPH*:= +M\*"KD_"@K)WPH+6%\*"WH?"@NI7PH+FM\*"YI/"@O9_PH8B!\*&)E?"AB;OP +MH8FT\*&+I/"ABY?PH8N]\*&,MO"AC83PH8^$\*&1K?"AEY?PIK"I\*&9A_"A +MG(;PH9V"\*&G@_"AL9;PH;2M\*&UA?"AM;CPH;6B\*&VH?"AMISPH;:2\*&V +MM_"AMZ#PH;BS\*&\GO"AO;;PH;^Z\**%N_"BC)[PHHZM\**;L_"BH9OPHJ*K +M\**FC_"BJKCPHJV/\**MD/"BK8;PHK"=\**NIO"BL*3PHK>A\*.'@_"CA[7P +MHX:V\*.-LO"CCY/PHX^2\*./D/"CCZ3PHX^5\*./FO"CCY_PHY&*\*.1D?"C +MD8OPHY&E\*.3I/"CE9KPHY:4\*.8N?"CF8?PHYBX\*.8NO"CG)SPHYR,\*.= +MI/"CG[_PHY^G\*.@I/"CH+WPHZJ8\*.QO_"CM(#PH[6`\*.WNO"CM[GPH[>3 +M\*.]OO"D@I;PI(2#\*2'AO"DA[[PI(Z\\*28J?"DFJ7PI**6\*2IC?"DK9;P +MI*VO\*2PEO"DM)3PI+B.\*2XM_"DN:KPI+J+\*6!BO"E@97PI82B\*6&J?"E +MAZ7PI8>-\*6(GO"EB8SPI9"N\*63F?"EEJ?PI9ZI\*6>M/"EIY3PI:ND\*6K +MH_"EJ['PI:ZR\*6QB_"EL:3PI;BN\*6YEO"EN:7PI;FB\*6[F/"ENX+PI;NH +M\*6\H_"EO9SPI;^@\*6_E/"F@(SPI;^[\*:`E_"F@:#PIH.M\*:)L/"FBH;P +MIHV,\*.TCO"FD(+PIIF^\*::L/"FG)WPIJ.=\*:CJO"FI9'PIJ6O\*:GG?"F +MJ)[PIJF8\*:JC/"FJK?PIK&S\*:SG?"FN:7PIKZ4\*:_N/"FO[;PIK^W\*>$ +MC?"GA+GPIX^;\*>/FO"GC[[PIY"0\*>1B?"GF)7PIYB4\*>8L?"GFI/PIYR. +M\*>JA/"GKK/PIZZ^\*>OA_"GLKCPI[:@\*>XD/"GOK?P +MJ(**\*B"N_"HBH+PJ(NS\*B0C/"HD97PJ)6K\*B7B/"HEXGPJ)N7\*B;NO"H +MI8GPJ*6&\*BEJ_"HIH?PJ*:(\*BFNO"HIKOPJ*B>\*BHJ?"HJ;'PJ*F#\*BJ +MF?"HJXWPJ*ND\*BKG?"HKX'PJ*^O\*BTD/"HM;'PJ+>[\*BXG_"HN+;PJ+J) +M\*B[J_"HO++PJ+^X\*F*H/"IBK'PJ9*0\*F7C_"IF;_PJ9NP\*FQ\*J8@O"JF)KPJIJR"@`` +eofeof + +printf "%-40s", "EUC-JISX0213 to UTF-8 (not in BMP)"; + &test("$nkf --ic=euc-jisx0213 -w",$example{'test_data/jisx0213nonbmp'},$example{'test_data/jisx0213nonbmp.ans'}); +printf "%-40s", "UTF-8 to EUC-JISX0213 (not in BMP)"; + &test("$nkf -W --oc=euc-jisx0213",$example{'test_data/jisx0213nonbmp.ans'},$example{'test_data/jisx0213nonbmp'}); + +# test_data/jisx0213needx0213_f + +$example{'test_data/jisx0213needx0213_f'} = unpack('u',<<'eofeof'); +MXH*LPKS#B<.?P['%C<6"Q)C$C<6OR:[%B\63RJ+)FLN0RZ;+GN*=O^*%M^*3 +MFN.+DN.+G^*8GO"@@(OE@(+EA(OCDYOEC:'EC:/EEH;OJ+CEG+/EHJGEI9WE +MB9WFD[?FF8CFFJ#FGKOFH;+CKK;GI(#OJ8WGIKCOJ97GKYGGL:WGM9SGN8?H +M@+?PIJN_Z(VBZ(ZGZ(^1Z)2CZ)F;Z)FLZ*"?[ZFA[ZFB[ZFD\*B)M^F"F>F$ +ME>F$I^>J@N>JN>>MI.>ML^>OL.>RIN>SM>2+G>>VI^>ZD>>]DO"CM([H@([P +MIIJPZ(2>\*:CG>B)B^^IG>B*M.B.E.B0C^B1O.B4F^B5D?"FO[;HF:_HFZ;H +MG+KHG;+HH(OPIYB4Z*.2Z*6%Y)JAZ*BUY)R,\*>NOO"GMJ#HM(GHN:SDH8[H +MOK;HO['I@K#IA8;IA9GIB9'PJ*:(Z8N&Z8N[Z8V:Z9")Z96XZ9J]Z9N:Z9Z6 +MZ:"EZ:*\Z:.QZ:6`Z:B@Z:BQZ:NE\*FXO>FOKO"INZGIL:GPJH":Z;:9Z;B" +-\**(F/"JE['PJIJR"@`` +eofeof + +$example{'test_data/jisx0213needx0213_f.ans'} = unpack('u',<<'eofeof'); +MJ:&ILZG`J=6IYZG^JJZJOJK,JM6JZZKZJZJKO:O#J]6KX:OQK*JLO*S+K-VL +MZJW^KJ*NO*[,KMNN[Z[PKZJOMJ_%K]^OZ:_^]:3UN/7$]=CUX_7^^:WYM/G$ +M^='YYOGP^J3ZM/K(^MWZ[?KY^Z'[MOO.^]+[Y_OY_*_\N/S)_-W\[/SPC_.M +MC_.TC_/.C_/1C_/CC_/VC_2HC_2[C_3$C_3>C_3JC_3UC_6EC_6ZC_7!C_76 +MC_7KC_7TC_:BC_:WC_;!C_;2C_;FC_;PC_>AC_>[C_?.C_?:C_?BC_?\C_BJ +MC_BSC_C`C_C.C_C8C_CKC_CYC_FOC_FSC_G/C_GUO[7#M=^U][7[MA.V-[8_MGNV?[:OMKNVO[;[M +MO^W"[<[MS^W0[=[MW^WL[>[M[^W[[?SN1>Y.[D_N4>Y>[E_N;>YO[GWN?NZ' +F[HWNC^Z0[I[NG^ZF[J[NK^ZV[K_NQ^[.[L_NT.[?[NSN[^[[[OP` +eofeof + +$example{'test_data/shift_jisx0213-utf8-need-no-cp932.ans'} = unpack('u',<<'eofeof'); +MYZ&#YZ2>HN>>IK>^IE>>KJ^>MCN>MH.^IEN2) +MI.>QF>>TL>>U@>>WH^>XB.>XD>>_G^>_K.^IF^B%H.B%I^B%J.B*H^B*I.B, +MHNB,NNB-@^B/A^B/C^B0BNB2M.B2NNB3@NB6HNB6L.B9F^B9HNB>K>B>M>B@ +MG^BCM>^IH.BDF.BHE>BHHNBIN>^IH^BMAN^II.BWCO"HB;?HO93PJ(^-Z+ZF +/Z8*^Z82BZ82OZ8>[Z8>D +eofeof + +printf "%-40s", "test_data/shift_jisx0213-utf8-need-no-cp932 "; + &test("$nkf --ic=shift_jisx0213 -w",$example{'test_data/shift_jisx0213-utf8-need-no-cp932'},$example{'test_data/shift_jisx0213-utf8-need-no-cp932.ans'}); + +# jisx0213conflict-ibmext +$example{'shift_jisx0213conflict-ibmext'} = "\x87\x40\xed\x40\xee\xf6\xfa\x52\xfb\x45\xfb\xfc\xfc\x4b"; +$example{'shift_jisx0213conflict-ibmext.x0213utf8'} = "\xe2\x91\xa0\xe7\xa1\x83\xe9\x86\x9e\xe8\xb4\x89\xe9\x8c\x8d\xe9\xa8\xa0\xf0\xa9\xa9\xb2"; +$example{'shift_jisx0213conflict-ibmext.cp932utf8'} = "\xe2\x91\xa0\xe7\xba\x8a\xe2\x85\xb7\xe2\x85\xa8\xe6\xb7\xbc\xe9\xab\x99\xe9\xbb\x91"; + +printf "%-40s", "Shift_JISX0213 to UTF-8 (ibmext etc)"; + &test("$nkf --ic=shift_jisx0213 -w", + $example{'shift_jisx0213conflict-ibmext'}, + $example{'shift_jisx0213conflict-ibmext.x0213utf8'}); + +printf "%-40s", "CP932 to UTF-8 (ibmext etc)"; + &test("$nkf --ic=cp932 -w", + $example{'shift_jisx0213conflict-ibmext'}, + $example{'shift_jisx0213conflict-ibmext.cp932utf8'}); + +printf "%-40s", "UTF-8 to Shift_JISX0213 (ibmext etc)"; + &test("$nkf --oc=shift_jisx0213 -W", + $example{'shift_jisx0213conflict-ibmext.x0213utf8'}, + $example{'shift_jisx0213conflict-ibmext'}); + +printf "%-40s", "UTF-8 to CP932 (ibmext etc)"; + &test("$nkf --oc=cp932 -W --cp932inv", + $example{'shift_jisx0213conflict-ibmext.cp932utf8'}, + "\x87\x40\xfa\x5c\xfa\x47\x87\x5c\xfb\x45\xfb\xfc\xfc\x4b"); +# test_data/jisx0213utf8comb + +$example{'test_data/jisx0213utf8comb'} = unpack('u',<<'eofeof'); +MI/>D^*3YI/JD^Z7WI?BE^:7ZI?NE_*7]I?ZF^*O$J\BKR:O*J\NKS*O-J\ZK +.SZOEJ^:KVJOWXX*:PZ;,@,F4 +MS(#)E,R!RHS,@,J,S(')FD^*3YI/JD^Z7WI?BE^:7ZI?NE_*7]I?ZF^*O$J\BKR:O*J\NKS*O-J\ZK +,SZOEJ^:KVJOWXX*:PZ;,@,F4 +MS(#)E,R!RHS,@,J,S(')FL>(B +# X0201 仮名 # X0201->X0208 conversion # X0208 aphabet -> ASCII -# X0201 $BAj8_JQ49(B +# X0201 相互変換 print "\nX0201 test\n\n"; @@ -502,6 +703,24 @@ printf "%-40s", "X0201 conversion: EUC"; &test("$nkf -jZ",$example{'x0201.euc'},$example{'x0201.x0208'}); printf "%-40s", "X0201 conversion: UTF8"; &test("$nkf -jZ",$example{'x0201.utf'},$example{'x0201.x0208'}); +printf "%-40s", "-wZ"; &test("$nkf -wZ", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ0"; &test("$nkf -wZ0", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ1"; &test("$nkf -wZ1", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + " aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ2"; &test("$nkf -wZ2", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + " aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ3"; &test("$nkf -wZ3", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xE3\x82\xA2"); +printf "%-40s", "-wZ4"; &test("$nkf -wZ4", + "\xE3\x80\x80\xEF\xBD\x81\xEF\xBC\xA1&\xE3\x82\xA2", + "\xE3\x80\x80aA&\xEF\xBD\xB1"); # -x means X0201 output printf "%-40s", "X0201 output: SJIS"; &test("$nkf -xs",$example{'x0201.euc'},$example{'x0201.sjis'}); @@ -511,6 +730,80 @@ printf "%-40s", "X0201 output: EUC"; &test("$nkf -xe",$example{'x0201.jis'},$example{'x0201.euc'}); printf "%-40s", "X0201 output: UTF8"; &test("$nkf -xw",$example{'x0201.jis'},$example{'x0201.utf'}); +# test_data/x0201jis= + +$example{'test_data/x0201jis=.ans'} = unpack('u',<<'eofeof'); +MH:*CI*6FIZBIJJNLK:ZOL+&RL[2UMK>XN;J[O+V^O\#!PL/$Q<;'R,G*R\S- +2SL_0T=+3U-76U]C9VMO?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_\x1b(B", + $example{'test_data/x0201jis=.ans'}); +# test_data/Xx0213 + +$example{'test_data/Xx0213'} = unpack('u',<<'eofeof'); +M[[VV[[Z?[[VW[[Z?[[VX[[Z?[[VY[[Z?[[VZ[[Z?[[V^[[Z?[[Z"[[Z?[[Z$ +*[[Z?XX>WXX*:"@`` +eofeof + +$example{'test_data/Xx0213.ans'} = unpack('u',<<'eofeof'); +3I?>E^*7YI?JE^Z7\I?VE_J;X"@`` +eofeof + +printf "%-40s", "test_data/Xx0213 "; + &test("$nkf -X -W --oc=euc-jisx0213",$example{'test_data/Xx0213'},$example{'test_data/Xx0213.ans'}); +# test_data/xx0213 + +$example{'test_data/xx0213'} = unpack('u',<<'eofeof'); +M[[VV[[Z?[[VW[[Z?[[VX[[Z?[[VY[[Z?[[VZ[[Z?[[V^[[Z?[[Z"[[Z?[[Z$ +*[[Z?XX>WXX*:"@`` +eofeof + +$example{'test_data/xx0213.ans'} = unpack('u',<<'eofeof'); +CCK:.WXZWCM^.N([?CKF.WXZZCM^.OH[?CL*.WX[$CM^F^`H` +eofeof + +printf "%-40s", "test_data/xx0213 "; + &test("$nkf -x -W --oc=euc-jisx0213",$example{'test_data/xx0213'},$example{'test_data/xx0213.ans'}); +# test_data/Z4x0213 + +$example{'test_data/Z4x0213'} = unpack('u',<<'eofeof'); +3I?>E^*7YI?JE^Z7\I?VE_J;X"@`` +eofeof + +$example{'test_data/Z4x0213.ans'} = unpack('u',<<'eofeof'); +M[[VV[[Z?[[VW[[Z?[[VX[[Z?[[VY[[Z?[[VZ[[Z?[[V^[[Z?[[Z"[[Z?[[Z$ +*[[Z?XX>WXX*:"@`` +eofeof + +printf "%-40s", "test_data/Z4x0213 "; + &test("$nkf -Z4 --ic=euc-jisx0213 -w",$example{'test_data/Z4x0213'},$example{'test_data/Z4x0213.ans'}); +# test_data/Z4comb + +$example{'test_data/Z4comb'} = unpack('u',<<'eofeof'); +MXX""XX",XX"-XX"!XX.[XX.\XX*;XX*XX.?XX.@XX.AXX.BXX.C +MXX.DXX.EXX.FXX.GXX.HXX.IXX.JXX.KXX.LXX.MXX.OXX.RXX.SXX.TXX*K +MXX*:XX*MXX*:XX*OXX*:XX*QXX*:XX*SXX*:XX*[XX*:XX.$XX*:XX.(XX*: +-XX>WXX*:XX*BXX*9"@`` +eofeof + +$example{'test_data/Z4comb.ans'} = unpack('u',<<'eofeof'); +MCJ&.HHZCCJ2.I8ZPCMZ.WXZGCK&.J(ZRCJF.LXZJCK2.JXZUCK:.MH[>CK>. +MMX[>CKB.N([>CKF.N8[>CKJ.NH[>CKN.NX[>CKR.O([>CKV.O8[>CKZ.OH[> +MCK^.OX[>CL".P([>CL&.P8[>CJ^.PH["CMZ.PX[#CMZ.Q([$CMZ.Q8[&CL>. +MR([)CLJ.RH[>CLJ.WX[+CLN.WH[+CM^.S([,CMZ.S([?CLV.S8[>CLV.WX[. +MCLZ.WH[.CM^.SX[0CM&.TH[3CJR.U(ZMCM6.KH[6CM>.V([9CMJ.VX[.WXZXCM^.N8[?CKJ.WXZ^CM^.PH[?CL2.WZ;XCK&.W@H` +eofeof + +printf "%-40s", "test_data/Z4comb "; + &test("$nkf -Z4 -W --oc=euc-jisx0213",$example{'test_data/Z4comb'},$example{'test_data/Z4comb.ans'}); # MIME decode @@ -870,6 +1163,20 @@ eofeof printf "%-40s", "test_data/q-encode-softrap"; &test("$nkf -jmQ",$example{'test_data/q-encode-softrap'},$example{'test_data/q-encode-softrap.ans'}); +# test_data/q-encode-utf-8 + +$example{'test_data/q-encode-utf-8'} = <<'eofeof'; +=?utf-8?Q?=E3=81=82=E3=81=84=E3=81=86=E3=81=88=E3=81=8A?= +=?utf-8?Q?=E3=81=8B=E3=81=8D=E3=81=8F=E3=81=91=E3=81=93?= +eofeof + +$example{'test_data/q-encode-utf-8.ans'} = <<"eofeof"; +\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A +\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F\xE3\x81\x91\xE3\x81\x93 +eofeof + +printf "%-40s", "test_data/q-encode-utf-8"; + &test("$nkf -w",$example{'test_data/q-encode-utf-8'},$example{'test_data/q-encode-utf-8.ans'}); # test_data/rot13 $example{'test_data/rot13'} = unpack('u',<<'eofeof'); @@ -986,6 +1293,82 @@ eofeof printf "%-40s", "test_data/bugs10904"; &test("$nkf -Mj",$example{'test_data/bugs10904'},$example{'test_data/bugs10904.ans'}); +printf "%-40s", "test_data/ruby-dev:39722"; + &test("$nkf -MjW",<5)#6#)98DM%23T_/0H` +eofeof + +printf "%-40s", "test_data/bug19779 "; + &test("$nkf -jM",$example{'test_data/bug19779'},$example{'test_data/bug19779.ans'}); + +printf "%-40s", "[nkf-forum:47327] "; + &test("$nkf -wM",pack('H*','feffd852de76d814dc45000a'),"=?UTF-8?B?8KSptvCVgYU=?=\n"); + +printf "%-40s", "[nkf-forum:47334] "; + &test("$nkf -w",pack('H*','feff006100620063000a'),"abc\n"); + +printf "%-40s", "[nkf-bug:20079] "; + &test("$nkf -jSxM","\xBB \xBB","=?ISO-2022-JP?B?GyhJOxsoQiAbKEk7GyhC?="); + +printf "%-40s", "[nkf-bug:20079] "; + &test("$nkf -SxMw8","\xBB \xBB","=?UTF-8?B?77u/7727IO+9uw==?="); + +printf "%-40s", "[nkf-forum:48850] "; + &test("$nkf -jSM", + "From: \x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0\x82\xA0" . + " \n", + "From: =?ISO-2022-JP?B?GyRCJCIkIiQiJCIkIiQiJCIkIiQiGyhC?=" . + " \n"); + +printf "%-40s", "[nkf-bug:21393]-x "; + &test("$nkf --ic=UTF-8 --oc=CP932", + "\xEF\xBD\xBC\xEF\xBE\x9E\xEF\xBD\xAC\xEF\xBD\xB0\xEF\xBE\x8F\xEF\xBE\x9D\xEF\xBD\xA5\xEF\xBE\x8E\xEF\xBE\x9F\xEF\xBE\x83\xEF\xBE\x84\xEF\xBD\xA1", + "\xBC\xDE\xAC\xB0\xCF\xDD\xA5\xCE\xDF\xC3\xC4\xA1"); + +printf "%-40s", "[nkf-bug:21393]-X "; + &test("$nkf --ic=UTF-8 --oc=CP932 -X", + "\xEF\xBD\xBC\xEF\xBE\x9E\xEF\xBD\xAC\xEF\xBD\xB0\xEF\xBE\x8F\xEF\xBE\x9D\xEF\xBD\xA5\xEF\xBE\x8E\xEF\xBE\x9F\xEF\xBE\x83\xEF\xBE\x84\xEF\xBD\xA1", + "\x83W\x83\x83\x81[\x83}\x83\x93\x81E\x83|\x83e\x83g\x81B"); + +printf "%-40s", "[nkf-forum:65316] "; + &test("$nkf -xwW -f10", + "\xEF\xBD\xB1\xEF\xBD\xB2\xEF\xBD\xB3\xEF\xBD\xB4\xEF\xBD\xB5\xEF\xBD\xB6\xEF\xBD\xB7\xEF\xBD\xB8\xEF\xBD\xB9\xEF\xBD\xBA\xEF\xBD\xBB\xEF\xBD\xBC\xEF\xBD\xBD\xEF\xBD\xBE\xEF\xBD\xBF\xEF\xBE\x80\xEF\xBE\x81\xEF\xBE\x82\xEF\xBE\x83\xEF\xBE\x84", + "\xEF\xBD\xB1\xEF\xBD\xB2\xEF\xBD\xB3\xEF\xBD\xB4\xEF\xBD\xB5\xEF\xBD\xB6\xEF\xBD\xB7\xEF\xBD\xB8\xEF\xBD\xB9\xEF\xBD\xBA\n\xEF\xBD\xBB\xEF\xBD\xBC\xEF\xBD\xBD\xEF\xBD\xBE\xEF\xBD\xBF\xEF\xBE\x80\xEF\xBE\x81\xEF\xBE\x82\xEF\xBE\x83\xEF\xBE\x84\n"); + +printf "%-40s", "[nkf-forum:65482] "; + &test("$nkf --ic=CP50221 --oc=CP932", + "\x1b\x24\x42\x7f\x21\x80\x21\x1b\x28\x42\n", + "\xf0\x40\xf0\x9f\x0a"); + +# [KNOWNBUG] +# printf "%-40s", "[ruby-dev:47057] "; +# &test("$nkf -jW -M --cp932", +# "\xe3\x80\x8c\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82 by \xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82\xe3\x80\x8d\xe3\x81\xae\xe3\x83\xac\xe3\x82\xb7\xe3\x83\x94", +# "=?ISO-2022-JP?B?GyRCIVYkIiQiJCIkIiQiJCIkIiQiJCIkIiQiGyhC?= by\n =?ISO-2022-JP?B?GyRCJCIkIiQiJCIkIiQiJCIkIiQiJCIhVyROJWwlNyVUGyhC?="); + +printf "%-40s", "[nkf-bug:32328] SJIS"; + &test("$nkf -Sw", + "\x1b\x82\xa0", + "\x1b\xe3\x81\x82"); + +printf "%-40s", "[nkf-bug:32328] JIS"; + &test("$nkf -Jw", + "\x1b\x1b\$B\$\x22\x1b(B", + "\x1b\xe3\x81\x82"); + if (!NKF) { printf "%-40s", "Guess NL"; &command_tests(