* \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
* http://sourceforge.jp/projects/nkf/
***********************************************************************/
-#define NKF_IDENT "$Id: nkf.c,v 1.173 2008/02/06 22:14:13 naruse Exp $"
+#define NKF_IDENT "$Id: nkf.c,v 1.174 2008/02/07 19:25:29 naruse Exp $"
#define NKF_VERSION "2.0.8"
#define NKF_RELEASE_DATE "2008-02-07"
#define COPY_RIGHT \
CP50222,
ISO_2022_JP_1,
ISO_2022_JP_3,
+ ISO_2022_JP_2004,
SHIFT_JIS,
WINDOWS_31J,
CP10001,
EUC_JP,
+ EUCJP_NKF,
CP51932,
EUCJP_MS,
EUCJP_ASCII,
UTF_32BE_BOM,
UTF_32LE,
UTF_32LE_BOM,
+ BINARY,
NKF_ENCODING_TABLE_SIZE,
JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
/* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
/* JIS_X_0213_2000_1 = 0x1228, */ /* O */
JIS_X_0213_2 = 0x1229, /* P */
JIS_X_0213_1 = 0x1233, /* Q */
- BINARY
};
nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
{CP50222, "CP50222", &NkfEncodingISO_2022_JP},
{ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
{ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
+ {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
{SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
{WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
{CP10001, "CP10001", &NkfEncodingShift_JIS},
{EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
+ {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
{CP51932, "CP51932", &NkfEncodingEUC_JP},
{EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
{EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
{"CP50222", CP50222},
{"ISO-2022-JP-1", ISO_2022_JP_1},
{"ISO-2022-JP-3", ISO_2022_JP_3},
+ {"ISO-2022-JP-2004", ISO_2022_JP_2004},
{"SHIFT_JIS", SHIFT_JIS},
{"SJIS", SHIFT_JIS},
{"WINDOWS-31J", WINDOWS_31J},
{"CP10001", CP10001},
{"EUCJP", EUC_JP},
{"EUC-JP", EUC_JP},
+ {"EUCJP-NKF", EUCJP_NKF},
{"CP51932", CP51932},
{"EUC-JP-MS", EUCJP_MS},
{"EUCJP-MS", EUCJP_MS},
/* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
#endif /* SHIFTJIS_CP932 */
-#ifdef X0212_ENABLE
static int x0212_f = FALSE;
-#endif
static int x0213_f = FALSE;
static unsigned char prefix_table[256];
switch (nkf_enc_to_index(enc)) {
case ISO_8859_1:
iso8859_f = TRUE;
+ break;
case CP50220:
case CP50221:
case CP50222:
#endif
break;
case ISO_2022_JP_1:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
break;
case ISO_2022_JP_3:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
+ x0213_f = TRUE;
+ break;
+ case ISO_2022_JP_2004:
+ x0212_f = TRUE;
x0213_f = TRUE;
break;
case SHIFT_JIS:
ms_ucs_map_f = UCS_MAP_CP932;
#endif
break;
- case EUC_JP:
break;
case CP10001:
#ifdef SHIFTJIS_CP932
ms_ucs_map_f = UCS_MAP_CP10001;
#endif
break;
+ case EUC_JP:
+ break;
+ case EUCJP_NKF:
+ break;
case CP51932:
#ifdef SHIFTJIS_CP932
cp51932_f = TRUE;
#endif
break;
case ISO_2022_JP_1:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
#ifdef SHIFTJIS_CP932
if (cp932inv_f == TRUE) cp932inv_f = FALSE;
#endif
break;
case ISO_2022_JP_3:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
if (cp932inv_f == TRUE) cp932inv_f = FALSE;
if (cp932inv_f == TRUE) cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
- ms_ucs_map_f = UCS_MAP_CP932;
+ ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
+ break;
+ case EUCJP_NKF:
+ x0212_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_ASCII;
#endif
break;
case CP51932:
#endif
break;
case EUCJP_MS:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_MS;
#endif
break;
case EUCJP_ASCII:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_ASCII;
#endif
break;
case EUC_JISX0213:
case EUC_JIS_2004:
-#ifdef X0212_ENABLE
x0212_f = TRUE;
-#endif
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
if (cp932inv_f == TRUE) cp932inv_f = FALSE;
c2 |= PREFIX_EUCG3;
}
if (c2 == SO) c2 = JIS_X_0201_1976_K;
- c1 = val & 0x7f;
+ c1 = val & 0xFF;
if (p2) *p2 = c2;
if (p1) *p1 = c1;
return 0;
(*o_putc)(ESC);
(*o_putc)('$');
(*o_putc)('(');
- (*o_putc)('O'); /* TODO */
+ (*o_putc)('Q');
break;
case JIS_X_0213_2:
(*o_putc)(ESC);
void e_oconv(nkf_char c2, nkf_char c1)
{
-#ifdef NUMCHAR_OPTION
if (c2 == 0 && nkf_char_unicode_p(c1)){
w16e_conv(c1, &c2, &c1);
if (c2 == 0 && nkf_char_unicode_p(c1)){
c2 = c1 & VALUE_MASK;
- if (x0212_f && ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
+ if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
/* eucJP-ms UDC */
c1 &= 0xFFF;
c2 = c1 / 94;
}
}
}
-#endif
+
if (c2 == EOF) {
(*o_putc)(EOF);
} else if (c2 == 0) {
output_encoding = nkf_enc_from_index(ISO_2022_JP);
continue;
case 'e': /* AT&T EUC output */
- output_encoding = nkf_enc_from_index(EUC_JP);
+ output_encoding = nkf_enc_from_index(EUCJP_NKF);
continue;
case 's': /* SJIS output */
output_encoding = nkf_enc_from_index(WINDOWS_31J);
input_encoding = nkf_enc_from_index(ISO_2022_JP);
continue;
case 'E': /* EUC-JP input */
- input_encoding = nkf_enc_from_index(EUC_JP);
+ input_encoding = nkf_enc_from_index(EUCJP_NKF);
continue;
case 'S': /* Windows-31J input */
input_encoding = nkf_enc_from_index(WINDOWS_31J);
#ifdef EXEC_IO
exec_f = exec_f_back;
#endif
-#ifdef X0212_ENABLE
x0212_f = x0212_f_back;
-#endif
x0213_f = x0213_f_back;
}
#
# nkf test program for nkf-2
#
-# $Id: nkf_test.pl,v 1.28 2008/02/06 20:46:39 naruse Exp $
+# $Id: nkf_test.pl,v 1.29 2008/02/07 19:25:29 naruse Exp $
#
# Shinji KONO <kono@ie.u-ryukyu.ac.jp>
# Sun Aug 18 12:25:40 JST 1996
printf "%-40s", "test_data/no-cp932inv";
&test("$nkf -sE --no-cp932",$example{'test_data/cp932.ans'},$example{'test_data/no-cp932inv.ans'});
-# ISO-2022-JP-MS
-$example{'iso2022jpms_sjis'} = "\xA1\xDF\xF0\xA0\xF4\xFC\xF5\x40\xF9\xFC";
-$example{'iso2022jpms_euc'} = "\x8e\xa1\x8e\xdf\xf6\xa2\xfe\xfe\x8f\xf5\xa1\x8f\xfe\xfe";
-$example{'iso2022jpms_jis'} = "\x1b\x28\x49\x21\x5f\x1b\x24\x28\x3f\x22\x22\x2a\x7e\x2b\x21\x34\x7e\x1b\x28\x42";
-
-# printf "%-40s", "ISO-2022-JP-MS to CP932";
-# &test("$nkf --ic=ISO-2022-JP-MS --oc=CP932",$example{'iso2022jpms_jis'},$example{'iso2022jpms_sjis'});
-#
-# printf "%-40s", "CP932 to ISO-2022-JP-MS";
-# &test("$nkf --ic=CP932 --oc=ISO-2022-JP-MS",$example{'iso2022jpms_sjis'},$example{'iso2022jpms_jis'});
-#
-# printf "%-40s", "ISO-2022-JP-MS to eucJP-ms";
-# &test("$nkf --ic=ISO-2022-JP-MS --oc=eucJP-ms",$example{'iso2022jpms_jis'},$example{'iso2022jpms_euc'});
-#
-# printf "%-40s", "eucJP-ms to ISO-2022-JP-MS";
-# &test("$nkf --ic=eucJP-ms --oc=ISO-2022-JP-MS",$example{'iso2022jpms_euc'},$example{'iso2022jpms_jis'});
# JIS X 0212
$example{'jisx0212_euc'} = "\x8F\xA2\xAF\x8F\xED\xE3";
$example{'jisx0212_jis'} = "\x1b\x24\x28\x44\x22\x2f\x6d\x63\x1b\x28\x42";
2HJ^O_<_5S_WTJ/[YCZ&AC_[V
eofeof
-$example{'jisx0213_jis'} = unpack('u',<<'eofeof');
+$example{'jisx0213_jis2000'} = unpack('u',<<'eofeof');
;&R0H3R(O+WU/54]]="A^>1LD*%`A(7YV&RA"
eofeof
+$example{'jisx0213_jis2004'} = unpack('u',<<'eofeof');
+;&R0H42(O+WU/54]]="A^>1LD*%`A(7YV&RA"
+eofeof
printf "%-40s", "Shift_JISX0213 to EUC-JISX0213";
&test("$nkf --ic=Shift_JISX0213 --oc=EUC-JISX0213",$example{'jisx0213_sjis'},$example{'jisx0213_euc'});
&test("$nkf --ic=EUC-JISX0213 --oc=Shift_JISX0213",$example{'jisx0213_euc'},$example{'jisx0213_sjis'});
printf "%-40s", "ISO-2022-JP-3 to EUC-JISX0213";
- &test("$nkf --ic=ISO-2022-JP-3 --oc=EUC-JISX0213",$example{'jisx0213_jis'},$example{'jisx0213_euc'});
+ &test("$nkf --ic=ISO-2022-JP-3 --oc=EUC-JISX0213",$example{'jisx0213_jis2000'},$example{'jisx0213_euc'});
+
+printf "%-40s", "ISO-2022-JP-2004 to EUC-JISX0213";
+ &test("$nkf --ic=ISO-2022-JP-2004 --oc=EUC-JISX0213",$example{'jisx0213_jis2004'},$example{'jisx0213_euc'});
-printf "%-40s", "EUC-JISX0213 to ISO-2022-JP-3";
- &test("$nkf --ic=EUC-JISX0213 --oc=ISO-2022-JP-3",$example{'jisx0213_euc'},$example{'jisx0213_jis'});
+printf "%-40s", "EUC-JISX0213 to ISO-2022-JP-2004";
+ &test("$nkf --ic=EUC-JISX0213 --oc=ISO-2022-JP-2004",$example{'jisx0213_euc'},$example{'jisx0213_jis2004'});
# test_data/no_best_fit_chars
$example{'test_data/no_best_fit_chars'} = unpack('u',<<'eofeof');
/*
* utf8tbl.c - Convertion Table for nkf
*
- * $Id: utf8tbl.c,v 1.22 2008/01/23 09:10:25 naruse Exp $
+ * $Id: utf8tbl.c,v 1.23 2008/02/07 19:25:29 naruse Exp $
*/
#include "config.h"
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0x0220, 0xA242, 0x2171, 0x2172, 0xA270, 0x5C, 0xA243, 0x2178,
- 0x212F, 0x027D, 0xA26C, 0, 0x224C, 0, 0xA26E, 0xA234,
+ 0x00A0, 0xA242, 0x2171, 0x2172, 0xA270, 0x5C, 0xA243, 0x2178,
+ 0x212F, 0x00FD, 0xA26C, 0, 0x224C, 0, 0xA26E, 0xA234,
0x216B, 0x215E, 0, 0, 0x212D, 0, 0x2279, 0,
0xA231, 0, 0xA26B, 0, 0, 0, 0, 0xA244,
};
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0x2B7B, 0,
0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0x2B7D, 0x027E, 0, 0, 0, 0, 0,
+ 0, 0x2B7D, 0x00FE, 0, 0, 0, 0, 0,
0, 0, 0, 0x2272, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,