X-Git-Url: http://git.sourceforge.jp/view?p=nkf%2Fnkf.git;a=blobdiff_plain;f=nkf.c;h=505d19290fc58126e7d98b611bbe287ab318b356;hp=43172a4ab3e4684c2b30e23b728cc2bc282eec13;hb=477df6ae857330396b089025d986f949f27bc5ac;hpb=6a0732c14b454e215bf8fc9e77196eb60ce9b28a diff --git a/nkf.c b/nkf.c index 43172a4..505d192 100644 --- a/nkf.c +++ b/nkf.c @@ -30,9 +30,9 @@ * 現在、nkf は SorceForge にてメンテナンスが続けられています。 * http://sourceforge.jp/projects/nkf/ ***********************************************************************/ -/* $Id: nkf.c,v 1.151 2007/12/06 20:13:58 naruse Exp $ */ +/* $Id: nkf.c,v 1.159 2007/12/23 07:55:20 naruse Exp $ */ #define NKF_VERSION "2.0.8" -#define NKF_RELEASE_DATE "2007-12-07" +#define NKF_RELEASE_DATE "2007-12-22" #define COPY_RIGHT \ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \ "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon" @@ -40,6 +40,14 @@ #include "config.h" #include "utf8tbl.h" +#if defined(DEFAULT_CODE_JIS) +#elif defined(DEFAULT_CODE_SJIS) +#elif defined(DEFAULT_CODE_EUC) +#elif defined(DEFAULT_CODE_UTF8) +#else +#define DEFAULT_CODE_JIS 1 +#endif + #ifndef MIME_DECODE_DEFAULT #define MIME_DECODE_DEFAULT STRICT_MIME #endif @@ -47,6 +55,29 @@ #define X0201_DEFAULT TRUE #endif +#if DEFAULT_NEWLINE == 0x0D0A +#define PUT_NEWLINE(func) do {\ + func(0x0D);\ + func(0x0A);\ +} while (0) +#define OCONV_NEWLINE(func) do {\ + func(0, 0x0D);\ + func(0, 0x0A);\ +} while (0) +#elif DEFAULT_NEWLINE == 0x0D +#define PUT_NEWLINE(func) func(0x0D) +#define OCONV_NEWLINE(func) func(0, 0x0D) +#else +#define DEFAULT_NEWLINE 0x0A +#define PUT_NEWLINE(func) func(0x0A) +#define OCONV_NEWLINE(func) func(0, 0x0A) +#endif +#ifdef HELP_OUTPUT_STDERR +#define HELP_OUTPUT stderr +#else +#define HELP_OUTPUT stdout +#endif + #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS) #define MSDOS #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__) @@ -149,41 +180,29 @@ void djgpp_setbinmode(FILE *fp) /* state of output_mode and input_mode c2 0 means ASCII - X0201 - ISO8859_1 - X0208 + JIS_X_0201 + ISO_8859_1 + JIS_X_0208 EOF all termination c1 32bit data */ -#define ASCII 0 -#define X0208 1 -#define X0201 2 -#define ISO8859_1 8 -#define X0212 0x2844 -#define X0213_1 0x284F -#define X0213_2 0x2850 - /* Input Assumption */ #define JIS_INPUT 4 #define EUC_INPUT 16 #define SJIS_INPUT 5 #define LATIN1_INPUT 6 +#define UTF8_INPUT 13 +#define UTF16_INPUT 1015 +#define UTF32_INPUT 1017 + #define FIXED_MIME 7 #define STRICT_MIME 8 /* MIME ENCODE */ -#define ISO2022JP 9 -#define JAPANESE_EUC 10 -#define SHIFT_JIS 11 - -#define UTF8 12 -#define UTF8_INPUT 13 -#define UTF16_INPUT 1015 -#define UTF32_INPUT 1017 /* byte order */ @@ -209,6 +228,180 @@ void djgpp_setbinmode(FILE *fp) #define SS3 0x8f #define CRLF 0x0D0A + +/* encodings */ + +enum nkf_encodings { + ASCII, + ISO_8859_1, + ISO_2022_JP, + CP50220, + CP50221, + CP50222, + ISO_2022_JP_1, + ISO_2022_JP_3, + SHIFT_JIS, + WINDOWS_31J, + CP10001, + EUC_JP, + CP51932, + EUCJP_MS, + EUCJP_ASCII, + SHIFT_JISX0213, + SHIFT_JIS_2004, + EUC_JISX0213, + EUC_JIS_2004, + UTF_8, + UTF_8N, + UTF_8_BOM, + UTF8_MAC, + UTF_16, + UTF_16BE, + UTF_16BE_BOM, + UTF_16LE, + UTF_16LE_BOM, + UTF_32, + UTF_32BE, + UTF_32BE_BOM, + UTF_32LE, + UTF_32LE_BOM, + JIS_X_0201=0x1000, + JIS_X_0208, + JIS_X_0212, + JIS_X_0213_1, + JIS_X_0213_2, + BINARY +}; + +nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0); +nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0); +nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0); +nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0); +nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0); +void j_oconv(nkf_char c2, nkf_char c1); +void s_oconv(nkf_char c2, nkf_char c1); +void e_oconv(nkf_char c2, nkf_char c1); +void w_oconv(nkf_char c2, nkf_char c1); +void w_oconv16(nkf_char c2, nkf_char c1); +void w_oconv32(nkf_char c2, nkf_char c1); + +typedef struct { + char *name; + nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0); + void (*oconv_func)(nkf_char c2, nkf_char c1); +} nkf_native_encoding; + +nkf_native_encoding NkfEncodingASCII = { "US_ASCII", e_iconv, e_oconv }; +nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv }; +nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv }; +nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv }; +nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv }; +nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 }; +nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 }; + +typedef struct { + int id; + char *name; + nkf_native_encoding *based_encoding; +} nkf_encoding; +nkf_encoding nkf_encoding_table[] = { + {ASCII, "ASCII", &NkfEncodingASCII}, + {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII}, + {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingASCII}, + {CP50220, "CP50220", &NkfEncodingISO_2022_JP}, + {CP50221, "CP50221", &NkfEncodingISO_2022_JP}, + {CP50222, "CP50222", &NkfEncodingISO_2022_JP}, + {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP}, + {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP}, + {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS}, + {WINDOWS_31J, "WINDOWS-31J", &NkfEncodingShift_JIS}, + {CP10001, "CP10001", &NkfEncodingShift_JIS}, + {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP}, + {CP51932, "CP51932", &NkfEncodingEUC_JP}, + {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP}, + {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP}, + {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS}, + {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS}, + {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP}, + {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP}, + {UTF_8, "UTF-8", &NkfEncodingUTF_8}, + {UTF_8N, "UTF-8N", &NkfEncodingUTF_8}, + {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8}, + {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8}, + {UTF_16, "UTF-16", &NkfEncodingUTF_16}, + {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16}, + {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16}, + {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16}, + {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16}, + {UTF_32, "UTF-32", &NkfEncodingUTF_32}, + {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32}, + {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32}, + {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32}, + {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32}, + {BINARY, "BINARY", &NkfEncodingASCII}, + {-1, NULL, NULL} +}; +#define NKF_ENCODING_TABLE_SIZE 34 +struct { + const char *name; + const int id; +} encoding_name_to_id_table[] = { + {"ASCII", ASCII}, + {"ISO-2022-JP", ISO_2022_JP}, + {"X-ISO2022JP-CP932", CP50220}, + {"CP50220", CP50220}, + {"CP50221", CP50221}, + {"CP50222", CP50222}, + {"ISO-2022-JP-1", ISO_2022_JP_1}, + {"ISO-2022-JP-3", ISO_2022_JP_3}, + {"SHIFT_JIS", SHIFT_JIS}, + {"SJIS", SHIFT_JIS}, + {"WINDOWS-31J", WINDOWS_31J}, + {"CSWINDOWS31J", WINDOWS_31J}, + {"CP932", WINDOWS_31J}, + {"MS932", WINDOWS_31J}, + {"CP10001", CP10001}, + {"EUCJP", EUC_JP}, + {"EUC-JP", EUC_JP}, + {"CP51932", CP51932}, + {"EUC-JP-MS", EUCJP_MS}, + {"EUCJP-MS", EUCJP_MS}, + {"EUCJPMS", EUCJP_MS}, + {"EUC-JP-ASCII", EUCJP_ASCII}, + {"EUCJP-ASCII", EUCJP_ASCII}, + {"SHIFT_JISX0213", SHIFT_JISX0213}, + {"SHIFT_JIS-2004", SHIFT_JIS_2004}, + {"EUC-JISX0213", EUC_JISX0213}, + {"EUC-JIS-2004", EUC_JIS_2004}, + {"UTF-8", UTF_8}, + {"UTF-8N", UTF_8N}, + {"UTF-8-BOM", UTF_8_BOM}, + {"UTF8-MAC", UTF8_MAC}, + {"UTF-8-MAC", UTF8_MAC}, + {"UTF-16", UTF_16}, + {"UTF-16BE", UTF_16BE}, + {"UTF-16BE-BOM", UTF_16BE_BOM}, + {"UTF-16LE", UTF_16LE}, + {"UTF-16LE-BOM", UTF_16LE_BOM}, + {"UTF-32", UTF_32}, + {"UTF-32BE", UTF_32BE}, + {"UTF-32BE-BOM", UTF_32BE_BOM}, + {"UTF-32LE", UTF_32LE}, + {"UTF-32LE-BOM", UTF_32LE_BOM}, + {"BINARY", BINARY}, + {NULL, -1} +}; +#if defined(DEFAULT_CODE_JIS) +#define DEFAULT_ENCODING ISO_2022_JP +#elif defined(DEFAULT_CODE_SJIS) +#define DEFAULT_ENCODING SHIFT_JIS +#elif defined(DEFAULT_CODE_EUC) +#define DEFAULT_ENCODING EUC_JP +#elif defined(DEFAULT_CODE_UTF8) +#define DEFAULT_ENCODING UTF_8 +#endif + + #define is_alnum(c) \ (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9')) @@ -283,10 +476,8 @@ struct input_code{ }; static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */ +static nkf_encoding *output_encoding; -#ifndef PERL_XS -static const char *CopyRight = COPY_RIGHT; -#endif #if !defined(PERL_XS) && !defined(WIN32DLL) static nkf_char noconvert(FILE *f); #endif @@ -295,9 +486,7 @@ static nkf_char kanji_convert(FILE *f); static nkf_char h_conv(FILE *f,nkf_char c2,nkf_char c1); static nkf_char push_hold_buf(nkf_char c2); static void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0)); -static nkf_char s_iconv(nkf_char c2,nkf_char c1,nkf_char c0); static nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1); -static nkf_char e_iconv(nkf_char c2,nkf_char c1,nkf_char c0); #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) /* UCS Mapping * 0: Shift_JIS, eucJP-ascii @@ -326,9 +515,6 @@ static void encode_fallback_perl(nkf_char c); static void encode_fallback_subchar(nkf_char c); static void (*encode_fallback)(nkf_char c) = NULL; static nkf_char w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1); -static nkf_char w_iconv(nkf_char c2,nkf_char c1,nkf_char c0); -static nkf_char w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0); -static nkf_char w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0); static nkf_char unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1); static nkf_char w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1); static void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0); @@ -340,14 +526,8 @@ static void w_status(struct input_code *, nkf_char); static int output_bom_f = FALSE; static int output_endian = ENDIAN_BIG; static nkf_char e2w_conv(nkf_char c2,nkf_char c1); -static void w_oconv(nkf_char c2,nkf_char c1); -static void w_oconv16(nkf_char c2,nkf_char c1); -static void w_oconv32(nkf_char c2,nkf_char c1); #endif -static void e_oconv(nkf_char c2,nkf_char c1); static nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1); -static void s_oconv(nkf_char c2,nkf_char c1); -static void j_oconv(nkf_char c2,nkf_char c1); static void fold_conv(nkf_char c2,nkf_char c1); static void nl_conv(nkf_char c2,nkf_char c1); static void z_conv(nkf_char c2,nkf_char c1); @@ -719,6 +899,57 @@ static int end_check; nkf_char std_gc_buf[STD_GC_BUFSIZE]; nkf_char std_gc_ndx; +char* nkf_strcpy(const char *str) +{ + char* result = malloc(strlen(str) + 1); + if (!result){ + perror(str); + return ""; + } + strcpy(result, str); + return result; +} + +static void nkf_str_upcase(const char *src, char *dest, size_t length) +{ + int i = 0; + for (; i < length && dest[i]; i++) { + dest[i] = nkf_toupper(src[i]); + } + dest[i] = 0; +} + +static nkf_encoding *nkf_enc_from_index(int idx) +{ + if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) { + return 0; + } + return &nkf_encoding_table[idx]; +} + +static int nkf_enc_find_index(const char *name) +{ + int i, index = -1; + for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) { + if (strcmp(name, encoding_name_to_id_table[i].name) == 0) { + return encoding_name_to_id_table[i].id; + } + } + return index; +} + +static nkf_encoding *nkf_enc_find(const char *name) +{ + int idx = -1; + idx = nkf_enc_find_index(name); + if (idx < 0) return 0; + return nkf_enc_from_index(idx); +} + +#define nkf_enc_name(enc) (enc)->name +#define nkf_enc_to_index(enc) (enc)->id +#define nkf_enc_to_base_encoding(enc) (enc)->based_encoding + #ifdef WIN32DLL #include "nkf32dll.c" #elif defined(PERL_XS) @@ -830,8 +1061,7 @@ int main(int argc, char **argv) iconv_for_check = 0; #endif if ((fin = fopen((origfname = *argv++), "r")) == NULL) { - perror(*--argv); - *argv++; + perror(*(argv-1)); is_argument_error = TRUE; continue; } else { @@ -1064,7 +1294,7 @@ static const struct { {"katakana","h2"}, {"katakana-hiragana","h3"}, {"guess=", ""}, - {"guess", "g"}, + {"guess", "g1"}, {"cp932", ""}, {"no-cp932", ""}, #ifdef X0212_ENABLE @@ -1126,6 +1356,7 @@ void options(unsigned char *cp) unsigned char *p; unsigned char *cp_back = NULL; char codeset[32]; + nkf_encoding *enc; if (option_mode==1) return; @@ -1162,16 +1393,15 @@ void options(unsigned char *cp) cp = (unsigned char *)long_option[i].alias; }else{ if (strcmp(long_option[i].name, "ic=") == 0){ - for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){ - codeset[i] = nkf_toupper(p[i]); - } - codeset[i] = 0; - if(strcmp(codeset, "ISO-2022-JP") == 0){ + nkf_str_upcase((char *)p, codeset, 32); + enc = nkf_enc_find(codeset); + switch (nkf_enc_to_index(enc)) { + case ISO_2022_JP: input_f = JIS_INPUT; - }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 || - strcmp(codeset, "CP50220") == 0 || - strcmp(codeset, "CP50221") == 0 || - strcmp(codeset, "CP50222") == 0){ + break; + case CP50220: + case CP50221: + case CP50222: input_f = JIS_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; @@ -1179,23 +1409,24 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){ + break; + case ISO_2022_JP_1: input_f = JIS_INPUT; #ifdef X0212_ENABLE x0212_f = TRUE; #endif - }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){ + break; + case ISO_2022_JP_3: input_f = JIS_INPUT; #ifdef X0212_ENABLE x0212_f = TRUE; #endif x0213_f = TRUE; - }else if(strcmp(codeset, "SHIFT_JIS") == 0){ + break; + case SHIFT_JIS: input_f = SJIS_INPUT; - }else if(strcmp(codeset, "WINDOWS-31J") == 0 || - strcmp(codeset, "CSWINDOWS31J") == 0 || - strcmp(codeset, "CP932") == 0 || - strcmp(codeset, "MS932") == 0){ + break; + case WINDOWS_31J: input_f = SJIS_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; @@ -1203,7 +1434,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "CP10001") == 0){ + break; + case CP10001: input_f = SJIS_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; @@ -1211,10 +1443,11 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP10001; #endif - }else if(strcmp(codeset, "EUCJP") == 0 || - strcmp(codeset, "EUC-JP") == 0){ + break; + case EUC_JP: input_f = EUC_INPUT; - }else if(strcmp(codeset, "CP51932") == 0){ + break; + case CP51932: input_f = EUC_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; @@ -1222,9 +1455,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "EUC-JP-MS") == 0 || - strcmp(codeset, "EUCJP-MS") == 0 || - strcmp(codeset, "EUCJPMS") == 0){ + break; + case EUCJP_MS: input_f = EUC_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; @@ -1232,8 +1464,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_MS; #endif - }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 || - strcmp(codeset, "EUCJP-ASCII") == 0){ + break; + case EUCJP_ASCII: input_f = EUC_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; @@ -1241,82 +1473,83 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_ASCII; #endif - }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 || - strcmp(codeset, "SHIFT_JIS-2004") == 0){ + break; + case SHIFT_JISX0213: + case SHIFT_JIS_2004: input_f = SJIS_INPUT; x0213_f = TRUE; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; #endif - }else if(strcmp(codeset, "EUC-JISX0213") == 0 || - strcmp(codeset, "EUC-JIS-2004") == 0){ + break; + case EUC_JISX0213: + case EUC_JIS_2004: input_f = EUC_INPUT; x0213_f = TRUE; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; #endif + break; #ifdef UTF8_INPUT_ENABLE - }else if(strcmp(codeset, "UTF-8") == 0 || - strcmp(codeset, "UTF-8N") == 0 || - strcmp(codeset, "UTF-8-BOM") == 0){ + case UTF_8: + case UTF_8N: + case UTF_8_BOM: input_f = UTF8_INPUT; + break; #ifdef UNICODE_NORMALIZATION - }else if(strcmp(codeset, "UTF8-MAC") == 0 || - strcmp(codeset, "UTF-8-MAC") == 0){ + case UTF8_MAC: input_f = UTF8_INPUT; nfc_f = TRUE; + break; #endif - }else if(strcmp(codeset, "UTF-16") == 0 || - strcmp(codeset, "UTF-16BE") == 0 || - strcmp(codeset, "UTF-16BE-BOM") == 0){ + case UTF_16: + case UTF_16BE: + case UTF_16BE_BOM: input_f = UTF16_INPUT; input_endian = ENDIAN_BIG; - }else if(strcmp(codeset, "UTF-16LE") == 0 || - strcmp(codeset, "UTF-16LE-BOM") == 0){ + break; + case UTF_16LE: + case UTF_16LE_BOM: input_f = UTF16_INPUT; input_endian = ENDIAN_LITTLE; - }else if(strcmp(codeset, "UTF-32") == 0 || - strcmp(codeset, "UTF-32BE") == 0 || - strcmp(codeset, "UTF-32BE-BOM") == 0){ + break; + case UTF_32: + case UTF_32BE: + case UTF_32BE_BOM: input_f = UTF32_INPUT; input_endian = ENDIAN_BIG; - }else if(strcmp(codeset, "UTF-32LE") == 0 || - strcmp(codeset, "UTF-32LE-BOM") == 0){ + break; + case UTF_32LE: + case UTF_32LE_BOM: input_f = UTF32_INPUT; input_endian = ENDIAN_LITTLE; + break; #endif - } else { + default: fprintf(stderr, "unknown input encoding: %s\n", codeset); + break; } continue; } if (strcmp(long_option[i].name, "oc=") == 0){ x0201_f = FALSE; - for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){ - codeset[i] = nkf_toupper(p[i]); - } - codeset[i] = 0; - if(strcmp(codeset, "ISO-2022-JP") == 0){ - output_conv = j_oconv; - }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){ + nkf_str_upcase((char *)p, codeset, 32); + output_encoding = nkf_enc_find(codeset); + switch (nkf_enc_to_index(output_encoding)) { + case ISO_2022_JP: output_conv = j_oconv; - no_cp932ext_f = TRUE; -#ifdef SHIFTJIS_CP932 - cp932inv_f = FALSE; -#endif -#ifdef UTF8_OUTPUT_ENABLE - ms_ucs_map_f = UCS_MAP_CP932; -#endif - }else if(strcmp(codeset, "CP50220") == 0){ - output_conv = j_oconv; - x0201_f = TRUE; + break; + case CP50220: + output_conv = j_oconv; + x0201_f = TRUE; #ifdef SHIFTJIS_CP932 - cp932inv_f = FALSE; + cp932inv_f = FALSE; #endif #ifdef UTF8_OUTPUT_ENABLE - ms_ucs_map_f = UCS_MAP_CP932; + ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "CP50221") == 0){ + break; + case CP50221: output_conv = j_oconv; #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; @@ -1324,7 +1557,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){ + break; + case ISO_2022_JP_1: output_conv = j_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; @@ -1332,7 +1566,8 @@ void options(unsigned char *cp) #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; #endif - }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){ + break; + case ISO_2022_JP_3: output_conv = j_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; @@ -1341,25 +1576,26 @@ void options(unsigned char *cp) #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; #endif - }else if(strcmp(codeset, "SHIFT_JIS") == 0){ + break; + case SHIFT_JIS: output_conv = s_oconv; - }else if(strcmp(codeset, "WINDOWS-31J") == 0 || - strcmp(codeset, "CSWINDOWS31J") == 0 || - strcmp(codeset, "CP932") == 0 || - strcmp(codeset, "MS932") == 0){ + break; + case WINDOWS_31J: output_conv = s_oconv; #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "CP10001") == 0){ + break; + case CP10001: output_conv = s_oconv; #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP10001; #endif - }else if(strcmp(codeset, "EUCJP") == 0 || - strcmp(codeset, "EUC-JP") == 0){ + break; + case EUC_JP: output_conv = e_oconv; - }else if(strcmp(codeset, "CP51932") == 0){ + break; + case CP51932: output_conv = e_oconv; #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; @@ -1367,9 +1603,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "EUC-JP-MS") == 0 || - strcmp(codeset, "EUCJP-MS") == 0 || - strcmp(codeset, "EUCJPMS") == 0){ + break; + case EUCJP_MS: output_conv = e_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; @@ -1377,8 +1612,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_MS; #endif - }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 || - strcmp(codeset, "EUCJP-ASCII") == 0){ + break; + case EUCJP_ASCII: output_conv = e_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; @@ -1386,15 +1621,17 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_ASCII; #endif - }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 || - strcmp(codeset, "SHIFT_JIS-2004") == 0){ - output_conv = s_oconv; - x0213_f = TRUE; + break; + case SHIFT_JISX0213: + case SHIFT_JIS_2004: + output_conv = s_oconv; + x0213_f = TRUE; #ifdef SHIFTJIS_CP932 - cp932inv_f = FALSE; + cp932inv_f = FALSE; #endif - }else if(strcmp(codeset, "EUC-JISX0213") == 0 || - strcmp(codeset, "EUC-JIS-2004") == 0){ + break; + case EUC_JISX0213: + case EUC_JIS_2004: output_conv = e_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; @@ -1403,43 +1640,54 @@ void options(unsigned char *cp) #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; #endif + break; #ifdef UTF8_OUTPUT_ENABLE - }else if(strcmp(codeset, "UTF-8") == 0){ - output_conv = w_oconv; - }else if(strcmp(codeset, "UTF-8N") == 0){ + case UTF_8: + case UTF_8N: output_conv = w_oconv; - }else if(strcmp(codeset, "UTF-8-BOM") == 0){ + break; + case UTF_8_BOM: output_conv = w_oconv; output_bom_f = TRUE; - }else if(strcmp(codeset, "UTF-16BE") == 0){ + break; + case UTF_16BE: output_conv = w_oconv16; - }else if(strcmp(codeset, "UTF-16") == 0 || - strcmp(codeset, "UTF-16BE-BOM") == 0){ + break; + case UTF_16: + case UTF_16BE_BOM: output_conv = w_oconv16; output_bom_f = TRUE; - }else if(strcmp(codeset, "UTF-16LE") == 0){ + break; + case UTF_16LE: output_conv = w_oconv16; output_endian = ENDIAN_LITTLE; - }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){ + break; + case UTF_16LE_BOM: output_conv = w_oconv16; output_endian = ENDIAN_LITTLE; output_bom_f = TRUE; - }else if(strcmp(codeset, "UTF-32") == 0 || - strcmp(codeset, "UTF-32BE") == 0){ + break; + case UTF_32: + case UTF_32BE: output_conv = w_oconv32; - }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){ + break; + case UTF_32BE_BOM: output_conv = w_oconv32; output_bom_f = TRUE; - }else if(strcmp(codeset, "UTF-32LE") == 0){ + break; + case UTF_32LE: output_conv = w_oconv32; output_endian = ENDIAN_LITTLE; - }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){ + break; + case UTF_32LE_BOM: output_conv = w_oconv32; output_endian = ENDIAN_LITTLE; output_bom_f = TRUE; + break; #endif - } else { + default: fprintf(stderr, "unknown output encoding: %s\n", codeset); + break; } continue; } @@ -1646,7 +1894,7 @@ void options(unsigned char *cp) if (*cp=='1') { /* alias of -t */ nop_f = TRUE; - *cp++; + *cp += 1; } else if (*cp=='2') { /* * -t with put/get @@ -1655,20 +1903,23 @@ void options(unsigned char *cp) * */ nop_f = 2; - *cp++; + *cp += 1; } else nop_f = TRUE; continue; case 'j': /* JIS output */ case 'n': output_conv = j_oconv; + output_encoding = nkf_enc_from_index(ISO_2022_JP); continue; case 'e': /* AT&T EUC output */ output_conv = e_oconv; cp932inv_f = FALSE; + output_encoding = nkf_enc_from_index(EUC_JP); continue; case 's': /* SJIS output */ output_conv = s_oconv; + output_encoding = nkf_enc_from_index(SHIFT_JIS); continue; case 'l': /* ISO8859 Latin-1 support, no conversion */ iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */ @@ -1716,16 +1967,22 @@ void options(unsigned char *cp) output_conv = w_oconv; cp++; if (cp[0] == '0'){ cp++; + output_encoding = nkf_enc_from_index(UTF_8N); } else { output_bom_f = TRUE; + output_encoding = nkf_enc_from_index(UTF_8_BOM); } } else { + int enc_idx; if ('1'== cp[0] && '6'==cp[1]) { output_conv = w_oconv16; cp+=2; + enc_idx = UTF_16; } else if ('3'== cp[0] && '2'==cp[1]) { output_conv = w_oconv32; cp+=2; + enc_idx = UTF_32; } else { output_conv = w_oconv; + output_encoding = nkf_enc_from_index(UTF_8); continue; } if (cp[0]=='L') { @@ -1734,13 +1991,21 @@ void options(unsigned char *cp) } else if (cp[0] == 'B') { cp++; } else { + output_encoding = nkf_enc_from_index(enc_idx); continue; } if (cp[0] == '0'){ cp++; + enc_idx = enc_idx == UTF_16 + ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE) + : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE); } else { output_bom_f = TRUE; + enc_idx = enc_idx == UTF_16 + ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM) + : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM); } + output_encoding = nkf_enc_from_index(enc_idx); } continue; #endif @@ -2660,7 +2925,7 @@ nkf_char kanji_convert(FILE *f) NEXT; } else { /* estab_f==TRUE */ if (iso8859_f) { - c2 = ISO8859_1; + c2 = ISO_8859_1; c1 &= 0x7f; SEND; } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) { @@ -2669,7 +2934,7 @@ nkf_char kanji_convert(FILE *f) (*oconv)(GETA1, GETA2); NEXT; } else { - c2 = X0201; + c2 = JIS_X_0201; c1 &= 0x7f; SEND; } @@ -2682,7 +2947,7 @@ nkf_char kanji_convert(FILE *f) (*oconv)(GETA1, GETA2); NEXT; } else { - c2 = X0201; + c2 = JIS_X_0201; c1 &= 0x7f; SEND; } @@ -2692,7 +2957,7 @@ nkf_char kanji_convert(FILE *f) } else if (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE)) { /* CP10001 */ - c2 = X0201; + c2 = JIS_X_0201; c1 &= 0x7f; SEND; } else { @@ -2706,7 +2971,7 @@ nkf_char kanji_convert(FILE *f) if (shift_mode) { /* output 1 shifted byte */ if (iso8859_f) { - c2 = ISO8859_1; + c2 = ISO_8859_1; SEND; } else if (SP <= c1 && c1 < (0xe0&0x7f)){ /* output 1 shifted byte */ @@ -2714,15 +2979,15 @@ nkf_char kanji_convert(FILE *f) (*oconv)(GETA1, GETA2); NEXT; } else { - c2 = X0201; + c2 = JIS_X_0201; SEND; } } else { /* look like bogus code */ NEXT; } - } else if (input_mode == X0208 || input_mode == X0212 || - input_mode == X0213_1 || input_mode == X0213_2) { + } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 || + input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) { /* in case of Kanji shifted */ c2 = c1; NEXT; @@ -2770,7 +3035,7 @@ nkf_char kanji_convert(FILE *f) LAST; } else if (c1 == '@'|| c1 == 'B') { /* This is kanji introduction */ - input_mode = X0208; + input_mode = JIS_X_0208; shift_mode = FALSE; set_input_codename("ISO-2022-JP"); #ifdef CHECK_OPTION @@ -2787,21 +3052,21 @@ nkf_char kanji_convert(FILE *f) LAST; } else if (c1 == '@'|| c1 == 'B') { /* This is kanji introduction */ - input_mode = X0208; + input_mode = JIS_X_0208; shift_mode = FALSE; NEXT; #ifdef X0212_ENABLE } else if (c1 == 'D'){ - input_mode = X0212; + input_mode = JIS_X_0212; shift_mode = FALSE; NEXT; #endif /* X0212_ENABLE */ - } else if (c1 == (X0213_1&0x7F)){ - input_mode = X0213_1; + } else if (c1 == 0x4F){ + input_mode = JIS_X_0213_1; shift_mode = FALSE; NEXT; - } else if (c1 == (X0213_2&0x7F)){ - input_mode = X0213_2; + } else if (c1 == 0x50){ + input_mode = JIS_X_0213_2; shift_mode = FALSE; NEXT; } else { @@ -2814,7 +3079,7 @@ nkf_char kanji_convert(FILE *f) } } else if (broken_f&0x2) { /* accept any ESC-(-x as broken code ... */ - input_mode = X0208; + input_mode = JIS_X_0208; shift_mode = FALSE; NEXT; } else { @@ -2832,7 +3097,7 @@ nkf_char kanji_convert(FILE *f) } else { if (c1 == 'I') { /* This is X0201 kana introduction */ - input_mode = X0201; shift_mode = X0201; + input_mode = JIS_X_0201; shift_mode = JIS_X_0201; NEXT; } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') { /* This is X0208 kanji introduction */ @@ -2853,7 +3118,7 @@ nkf_char kanji_convert(FILE *f) c3 = (*i_getc)(f); /* skip SS2 */ if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){ c1 = c3; - c2 = X0201; + c2 = JIS_X_0201; SEND; }else{ (*i_ungetc)(c3, f); @@ -2937,7 +3202,7 @@ nkf_char kanji_convert(FILE *f) SEND; } } - } else if (c1 == DEL && input_mode == X0208) { + } else if (c1 == DEL && input_mode == JIS_X_0208) { /* CP5022x */ c2 = c1; NEXT; @@ -2968,8 +3233,8 @@ nkf_char kanji_convert(FILE *f) break; } break; - case X0208: - case X0213_1: + case JIS_X_0208: + case JIS_X_0213_1: if (ms_ucs_map_f && 0x7F <= c2 && c2 <= 0x92 && 0x21 <= c1 && c1 <= 0x7E) { @@ -2981,11 +3246,11 @@ nkf_char kanji_convert(FILE *f) (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */ break; #ifdef X0212_ENABLE - case X0212: + case JIS_X_0212: (*oconv)(PREFIX_EUCG3 | c2, c1); break; #endif /* X0212_ENABLE */ - case X0213_2: + case JIS_X_0213_2: (*oconv)(PREFIX_EUCG3 | c2, c1); break; default: @@ -3081,7 +3346,7 @@ h_conv(FILE *f, nkf_char c2, nkf_char c1) (*iconv)(0, c2, 0); continue; }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){ - (*iconv)(X0201, c2, 0); + (*iconv)(JIS_X_0201, c2, 0); continue; } if (hold_index < hold_count){ @@ -3212,7 +3477,7 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0) { - if (c2 == X0201) { + if (c2 == JIS_X_0201) { c1 &= 0x7f; } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) { /* NOP */ @@ -3231,7 +3496,7 @@ nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0) nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0) { - if (c2 == X0201) { + if (c2 == JIS_X_0201) { c1 &= 0x7f; #ifdef X0212_ENABLE }else if (c2 == 0x8f){ @@ -3260,7 +3525,7 @@ nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0) } #endif /* X0212_ENABLE */ } else if (c2 == SSO){ - c2 = X0201; + c2 = JIS_X_0201; c1 &= 0x7f; } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) { /* NOP */ @@ -3678,7 +3943,7 @@ nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *p c2 &= 0x7f; c2 |= PREFIX_EUCG3; } - if (c2 == SO) c2 = X0201; + if (c2 == SO) c2 = JIS_X_0201; c1 = val & 0x7f; if (p2) *p2 = c2; if (p1) *p1 = c1; @@ -3778,7 +4043,7 @@ nkf_char e2w_conv(nkf_char c2, nkf_char c1) { const unsigned short *p; - if (c2 == X0201) { + if (c2 == JIS_X_0201) { if (ms_ucs_map_f == UCS_MAP_CP10001) { switch (c1) { case 0x20: @@ -3859,11 +4124,11 @@ void w_oconv(nkf_char c2, nkf_char c1) if (c2 == 0) { output_mode = ASCII; (*o_putc)(c1); - } else if (c2 == ISO8859_1) { - output_mode = UTF8; + } else if (c2 == ISO_8859_1) { + output_mode = UTF_8; (*o_putc)(c1 | 0x080); } else { - output_mode = UTF8; + output_mode = UTF_8; val = e2w_conv(c2, c1); if (val){ w16w_conv(val, &c2, &c1, &c0); @@ -3894,7 +4159,7 @@ void w_oconv16(nkf_char c2, nkf_char c1) return; } - if (c2 == ISO8859_1) { + if (c2 == ISO_8859_1) { c2 = 0; c1 |= 0x80; #ifdef NUMCHAR_OPTION @@ -3959,7 +4224,7 @@ void w_oconv32(nkf_char c2, nkf_char c1) return; } - if (c2 == ISO8859_1) { + if (c2 == ISO_8859_1) { c1 |= 0x80; #ifdef NUMCHAR_OPTION } else if (c2 == 0 && is_unicode_capsule(c1)) { @@ -4018,15 +4283,15 @@ void e_oconv(nkf_char c2, nkf_char c1) } else if (c2 == 0) { output_mode = ASCII; (*o_putc)(c1); - } else if (c2 == X0201) { - output_mode = JAPANESE_EUC; + } else if (c2 == JIS_X_0201) { + output_mode = EUC_JP; (*o_putc)(SSO); (*o_putc)(c1|0x80); - } else if (c2 == ISO8859_1) { - output_mode = ISO8859_1; + } else if (c2 == ISO_8859_1) { + output_mode = ISO_8859_1; (*o_putc)(c1 | 0x080); #ifdef X0212_ENABLE } else if (is_eucg3(c2)){ - output_mode = JAPANESE_EUC; + output_mode = EUC_JP; #ifdef SHIFTJIS_CP932 if (!cp932inv_f){ nkf_char s2, s1; @@ -4054,7 +4319,7 @@ void e_oconv(nkf_char c2, nkf_char c1) set_iconv(FALSE, 0); return; /* too late to rescue this char */ } - output_mode = JAPANESE_EUC; + output_mode = EUC_JP; (*o_putc)(c2 | 0x080); (*o_putc)(c1 | 0x080); } @@ -4161,11 +4426,11 @@ void s_oconv(nkf_char c2, nkf_char c1) } else if (c2 == 0) { output_mode = ASCII; (*o_putc)(c1); - } else if (c2 == X0201) { + } else if (c2 == JIS_X_0201) { output_mode = SHIFT_JIS; (*o_putc)(c1|0x80); - } else if (c2 == ISO8859_1) { - output_mode = ISO8859_1; + } else if (c2 == ISO_8859_1) { + output_mode = ISO_8859_1; (*o_putc)(c1 | 0x080); #ifdef X0212_ENABLE } else if (is_eucg3(c2)){ @@ -4222,7 +4487,7 @@ void j_oconv(nkf_char c2, nkf_char c1) } #endif if (c2 == EOF) { - if (output_mode !=ASCII && output_mode!=ISO8859_1) { + if (output_mode !=ASCII && output_mode!=ISO_8859_1) { (*o_putc)(ESC); (*o_putc)('('); (*o_putc)(ascii_intro); @@ -4232,41 +4497,41 @@ void j_oconv(nkf_char c2, nkf_char c1) #ifdef X0212_ENABLE } else if (is_eucg3(c2)){ if(x0213_f){ - if(output_mode!=X0213_2){ - output_mode = X0213_2; + if(output_mode!=JIS_X_0213_2){ + output_mode = JIS_X_0213_2; (*o_putc)(ESC); (*o_putc)('$'); (*o_putc)('('); - (*o_putc)(X0213_2&0x7F); + (*o_putc)(0x50); } }else{ - if(output_mode!=X0212){ - output_mode = X0212; + if(output_mode!=JIS_X_0212){ + output_mode = JIS_X_0212; (*o_putc)(ESC); (*o_putc)('$'); (*o_putc)('('); - (*o_putc)(X0212&0x7F); + (*o_putc)(0x44); } } (*o_putc)(c2 & 0x7f); (*o_putc)(c1); #endif - } else if (c2==X0201) { - if (output_mode!=X0201) { - output_mode = X0201; + } else if (c2==JIS_X_0201) { + if (output_mode!=JIS_X_0201) { + output_mode = JIS_X_0201; (*o_putc)(ESC); (*o_putc)('('); (*o_putc)('I'); } (*o_putc)(c1); - } else if (c2==ISO8859_1) { + } else if (c2==ISO_8859_1) { /* iso8859 introduction, or 8th bit on */ /* Can we convert in 7bit form using ESC-'-'-A ? Is this popular? */ - output_mode = ISO8859_1; + output_mode = ISO_8859_1; (*o_putc)(c1|0x80); } else if (c2 == 0) { - if (output_mode !=ASCII && output_mode!=ISO8859_1) { + if (output_mode !=ASCII && output_mode!=ISO_8859_1) { (*o_putc)(ESC); (*o_putc)('('); (*o_putc)(ascii_intro); @@ -4278,15 +4543,15 @@ void j_oconv(nkf_char c2, nkf_char c1) ? c2<0x20 || 0x92fold_len+fold_margin) { /* too many kinsoku suspension */ f_line = char_size(c2,c1); fold_state = LF; /* We can't wait, do fold now */ - } else if (c2==X0201) { + } else if (c2==JIS_X_0201) { /* simple kinsoku rules return 1 means no folding */ if (c1==(0xde&0x7f)) fold_state = 1; /* ゛*/ else if (c1==(0xdf&0x7f)) fold_state = 1; /* ゜*/ @@ -4544,13 +4809,13 @@ void fold_conv(nkf_char c2, nkf_char c1) /* terminator process */ switch(fold_state) { case LF: - (*o_fconv)(0,LF); + OCONV_NEWLINE((*o_fconv)); (*o_fconv)(c2,c1); break; case 0: return; case CR: - (*o_fconv)(0,LF); + OCONV_NEWLINE((*o_fconv)); break; case TAB: case SP: @@ -4568,14 +4833,14 @@ void z_conv(nkf_char c2, nkf_char c1) /* if (c2) c1 &= 0x7f; assertion */ - if (c2 == X0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) { + if (c2 == JIS_X_0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) { (*o_zconv)(c2,c1); return; } if (x0201_f) { - if (z_prev2 == X0201) { - if (c2 == X0201) { + if (z_prev2 == JIS_X_0201) { + if (c2 == JIS_X_0201) { if (c1 == (0xde&0x7f)) { /* 濁点 */ z_prev2 = 0; (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]); @@ -4589,7 +4854,7 @@ void z_conv(nkf_char c2, nkf_char c1) z_prev2 = 0; (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]); } - if (c2 == X0201) { + if (c2 == JIS_X_0201) { if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) { /* wait for 濁点 or 半濁点 */ z_prev1 = c1; @@ -4681,7 +4946,7 @@ void z_conv(nkf_char c2, nkf_char c1) break; } if (c) { - (*o_zconv)(X0201, c); + (*o_zconv)(JIS_X_0201, c); return; } } else if (c2 == 0x25) { @@ -4703,9 +4968,9 @@ void z_conv(nkf_char c2, nkf_char c1) }; if (fullwidth_to_halfwidth[c1-0x20]){ c2 = fullwidth_to_halfwidth[c1-0x20]; - (*o_zconv)(X0201, c2>>8); + (*o_zconv)(JIS_X_0201, c2>>8); if (c2 & 0xFF) { - (*o_zconv)(X0201, c2&0xFF); + (*o_zconv)(JIS_X_0201, c2&0xFF); } return; } @@ -4734,7 +4999,7 @@ void z_conv(nkf_char c2, nkf_char c1) void rot_conv(nkf_char c2, nkf_char c1) { - if (c2==0 || c2==X0201 || c2==ISO8859_1) { + if (c2==0 || c2==JIS_X_0201 || c2==ISO_8859_1) { c1 = rot13(c1); } else if (c2) { c1 = rot47(c1); @@ -4852,9 +5117,9 @@ nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = { }; static const nkf_char mime_encode[] = { - JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201, + EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201, #if defined(UTF8_INPUT_ENABLE) - UTF8, UTF8, + UTF_8, UTF_8, #endif ASCII, 0 @@ -5043,35 +5308,43 @@ void set_input_codename(char *codename) } } +static char* get_guessed_code(void) +{ + if (input_codename && !*input_codename) { + input_codename = "BINARY"; + } else { + struct input_code *p = find_inputcode_byfunc(iconv); + if (!input_codename) { + input_codename = "ASCII"; + } else if (strcmp(input_codename, "Shift_JIS") == 0) { + if (p->score & (SCORE_DEPEND|SCORE_CP932)) + input_codename = "CP932"; + } else if (strcmp(input_codename, "EUC-JP") == 0) { + if (p->score & (SCORE_X0212)) + input_codename = "EUCJP-MS"; + else if (p->score & (SCORE_DEPEND|SCORE_CP932)) + input_codename = "CP51932"; + } else if (strcmp(input_codename, "ISO-2022-JP") == 0) { + if (p->score & (SCORE_KANA)) + input_codename = "CP50221"; + else if (p->score & (SCORE_DEPEND|SCORE_CP932)) + input_codename = "CP50220"; + } + } + return input_codename; +} + #if !defined(PERL_XS) && !defined(WIN32DLL) void print_guessed_code(char *filename) { - char *codename = "BINARY"; - char *str_nlmode = NULL; if (filename != NULL) printf("%s: ", filename); if (input_codename && !*input_codename) { printf("BINARY\n"); } else { - struct input_code *p = find_inputcode_byfunc(iconv); + input_codename = get_guessed_code(); if (guess_f == 1) { - printf("%s\n", input_codename ? input_codename : "ASCII"); + printf("%s\n", input_codename); } else { - if (!input_codename) { - input_codename = "ASCII"; - } else if (strcmp(input_codename, "Shift_JIS") == 0) { - if (p->score & (SCORE_DEPEND|SCORE_CP932)) - input_codename = "CP932"; - } else if (strcmp(input_codename, "EUC-JP") == 0) { - if (p->score & (SCORE_X0212)) - input_codename = "EUCJP-MS"; - else if (p->score & (SCORE_DEPEND|SCORE_CP932)) - input_codename = "CP51932"; - } else if (strcmp(input_codename, "ISO-2022-JP") == 0) { - if (p->score & (SCORE_KANA)) - input_codename = "CP50221"; - else if (p->score & (SCORE_DEPEND|SCORE_CP932)) - input_codename = "CP50220"; - } printf("%s%s\n", input_codename, input_newline == CR ? " (CR)" : @@ -5577,7 +5850,7 @@ void open_mime(nkf_char mode) (*o_mputc)(mimeout_buf[i]); i++; } - (*o_mputc)(LF); + PUT_NEWLINE((*o_mputc)); (*o_mputc)(SP); base64_count = 1; if (mimeout_buf_count>0 @@ -5690,14 +5963,14 @@ void mime_prechar(nkf_char c2, nkf_char c1) if (c2 == EOF){ if (base64_count + mimeout_buf_count/3*4> 73){ (*o_base64conv)(EOF,0); - (*o_base64conv)(0,LF); + OCONV_NEWLINE((*o_base64conv)); (*o_base64conv)(0,SP); base64_count = 1; } } else { if (base64_count + mimeout_buf_count/3*4> 66) { (*o_base64conv)(EOF,0); - (*o_base64conv)(0,LF); + OCONV_NEWLINE((*o_base64conv)); (*o_base64conv)(0,SP); base64_count = 1; mimeout_mode = -1; @@ -5705,10 +5978,10 @@ void mime_prechar(nkf_char c2, nkf_char c1) } } else if (c2) { if (c2 != EOF && base64_count + mimeout_buf_count/3*4> 60) { - mimeout_mode = (output_mode==ASCII ||output_mode == ISO8859_1) ? 'Q' : 'B'; + mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B'; open_mime(output_mode); (*o_base64conv)(EOF,0); - (*o_base64conv)(0,LF); + OCONV_NEWLINE((*o_base64conv)); (*o_base64conv)(0,SP); base64_count = 1; mimeout_mode = -1; @@ -5726,14 +5999,14 @@ void mime_putc(nkf_char c) if (base64_count > 71){ if (c!=CR && c!=LF) { (*o_mputc)('='); - (*o_mputc)(LF); + PUT_NEWLINE((*o_mputc)); } base64_count = 0; } }else{ if (base64_count > 71){ eof_mime(); - (*o_mputc)(LF); + PUT_NEWLINE((*o_mputc)); base64_count = 0; } if (c == EOF) { /* c==EOF */ @@ -5786,7 +6059,7 @@ void mime_putc(nkf_char c) } if (mimeout_mode=='Q') { - if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) { + if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) { if (c == CR || c == LF) { close_mime(); (*o_mputc)(c); @@ -5795,7 +6068,7 @@ void mime_putc(nkf_char c) } else if (c <= SP) { close_mime(); if (base64_count > 70) { - (*o_mputc)(LF); + PUT_NEWLINE((*o_mputc)); base64_count = 0; } if (!nkf_isblank(c)) { @@ -5805,7 +6078,7 @@ void mime_putc(nkf_char c) } else { if (base64_count > 70) { close_mime(); - (*o_mputc)(LF); + PUT_NEWLINE((*o_mputc)); (*o_mputc)(SP); base64_count = 1; open_mime(output_mode); @@ -5822,7 +6095,7 @@ void mime_putc(nkf_char c) } if (mimeout_mode <= 0) { - if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) { + if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) { if (nkf_isspace(c)) { int flag = 0; if (mimeout_mode == -1) { @@ -5855,7 +6128,7 @@ void mime_putc(nkf_char c) if (base64_count > 1 && base64_count + mimeout_buf_count > 76 && mimeout_buf[0] != CR && mimeout_buf[0] != LF){ - (*o_mputc)(LF); + PUT_NEWLINE((*o_mputc)); base64_count = 0; if (!nkf_isspace(mimeout_buf[0])){ (*o_mputc)(SP); @@ -5888,7 +6161,7 @@ void mime_putc(nkf_char c) } }else{ /* mimeout_mode == 'B', 1, 2 */ - if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) { + if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) { if (lastchar == CR || lastchar == LF){ if (nkf_isblank(c)) { for (i=0;ihiragana, 2 hiragana->katakana, 3 both\n"); - fprintf(stderr,"v Show this usage. V: show version\n"); - fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n"); - fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n"); - fprintf(stderr,"l ISO8859-1 (Latin-1) support\n"); - fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"); - fprintf(stderr,"Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"); - fprintf(stderr," 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"); - fprintf(stderr," 4: JISX0208 Katakana to JISX0201 Katakana\n"); - fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"); - fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"); + fprintf(HELP_OUTPUT," After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"); +#endif + fprintf(HELP_OUTPUT,"t no conversion\n"); + fprintf(HELP_OUTPUT,"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"); + fprintf(HELP_OUTPUT,"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"); + fprintf(HELP_OUTPUT,"r {de/en}crypt ROT13/47\n"); + fprintf(HELP_OUTPUT,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"); + fprintf(HELP_OUTPUT,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n"); + fprintf(HELP_OUTPUT,"M[BQ] MIME encode [B:base64 Q:quoted]\n"); + fprintf(HELP_OUTPUT,"l ISO8859-1 (Latin-1) support\n"); + fprintf(HELP_OUTPUT,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"); + fprintf(HELP_OUTPUT,"Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"); + fprintf(HELP_OUTPUT," 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"); + fprintf(HELP_OUTPUT," 4: JISX0208 Katakana to JISX0201 Katakana\n"); + fprintf(HELP_OUTPUT,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"); + fprintf(HELP_OUTPUT,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"); #ifdef MSDOS - fprintf(stderr,"T Text mode output\n"); -#endif - fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n"); - fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n"); - fprintf(stderr,"d,c Convert line breaks -d: LF -c: CRLF\n"); - fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"); - fprintf(stderr,"\n"); - fprintf(stderr,"Long name options\n"); - fprintf(stderr," --ic= --oc=\n"); - fprintf(stderr," Specify the input or output codeset\n"); - fprintf(stderr," --fj --unix --mac --windows\n"); - fprintf(stderr," --jis --euc --sjis --utf8 --utf16 --mime --base64\n"); - fprintf(stderr," Convert for the system or code\n"); - fprintf(stderr," --hiragana --katakana --katakana-hiragana\n"); - fprintf(stderr," To Hiragana/Katakana Conversion\n"); - fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n"); + fprintf(HELP_OUTPUT,"T Text mode output\n"); +#endif + fprintf(HELP_OUTPUT,"O Output to File (DEFAULT 'nkf.out')\n"); + fprintf(HELP_OUTPUT,"I Convert non ISO-2022-JP charactor to GETA\n"); + fprintf(HELP_OUTPUT,"d,c Convert line breaks -d: LF -c: CRLF\n"); + fprintf(HELP_OUTPUT,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"); + fprintf(HELP_OUTPUT,"v, V Show this usage. V: show configuration\n"); + fprintf(HELP_OUTPUT,"\n"); + fprintf(HELP_OUTPUT,"Long name options\n"); + fprintf(HELP_OUTPUT," --ic= --oc=\n"); + fprintf(HELP_OUTPUT," Specify the input or output codeset\n"); + fprintf(HELP_OUTPUT," --fj --unix --mac --windows\n"); + fprintf(HELP_OUTPUT," --jis --euc --sjis --utf8 --utf16 --mime --base64\n"); + fprintf(HELP_OUTPUT," Convert for the system or code\n"); + fprintf(HELP_OUTPUT," --hiragana --katakana --katakana-hiragana\n"); + fprintf(HELP_OUTPUT," To Hiragana/Katakana Conversion\n"); + fprintf(HELP_OUTPUT," --prefix= Insert escape before troublesome characters of Shift_JIS\n"); #ifdef INPUT_OPTION - fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n"); + fprintf(HELP_OUTPUT," --cap-input, --url-input Convert hex after ':' or '%%'\n"); #endif #ifdef NUMCHAR_OPTION - fprintf(stderr," --numchar-input Convert Unicode Character Reference\n"); + fprintf(HELP_OUTPUT," --numchar-input Convert Unicode Character Reference\n"); #endif #ifdef UTF8_INPUT_ENABLE - fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n"); - fprintf(stderr," Specify how nkf handles unassigned characters\n"); + fprintf(HELP_OUTPUT," --fb-{skip, html, xml, perl, java, subchar}\n"); + fprintf(HELP_OUTPUT," Specify how nkf handles unassigned characters\n"); #endif #ifdef OVERWRITE - fprintf(stderr," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n"); - fprintf(stderr," Overwrite original listed files by filtered result\n"); - fprintf(stderr," --overwrite preserves timestamp of original files\n"); -#endif - fprintf(stderr," -g --guess Guess the input code\n"); - fprintf(stderr," --help --version Show this help/the version\n"); - fprintf(stderr," For more information, see also man nkf\n"); - fprintf(stderr,"\n"); + fprintf(HELP_OUTPUT," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n"); + fprintf(HELP_OUTPUT," Overwrite original listed files by filtered result\n"); + fprintf(HELP_OUTPUT," --overwrite preserves timestamp of original files\n"); +#endif + fprintf(HELP_OUTPUT," -g --guess Guess the input code\n"); + fprintf(HELP_OUTPUT," --help --version Show this help/the version\n"); + fprintf(HELP_OUTPUT," For more information, see also man nkf\n"); + fprintf(HELP_OUTPUT,"\n"); version(); } void show_configuration(void) { - fprintf(stderr, "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"); - fprintf(stderr, " Compile-time options:\n"); - fprintf(stderr, " Default encoding: " + fprintf(HELP_OUTPUT, "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"); + fprintf(HELP_OUTPUT, " Compile-time options:\n"); + fprintf(HELP_OUTPUT, " Default output encoding: " #if defined(DEFAULT_CODE_JIS) "ISO-2022-JP" #elif defined(DEFAULT_CODE_SJIS) @@ -6189,17 +6463,42 @@ void show_configuration(void) "EUC-JP" #elif defined(DEFAULT_CODE_UTF8) "UTF-8" +#endif + "\n"); + fprintf(HELP_OUTPUT, " Default output newline: " +#if DEFAULT_NEWLINE == CR + "CR" +#elif DEFAULT_NEWLINE == CRLF + "CRLF" #else - "UNKOWN" + "LF" #endif "\n"); - fprintf(stderr, " Decode MIME encoded string: %s\n", MIME_DECODE_DEFAULT ? "ON" : "OFF"); - fprintf(stderr, " Convert JIS X 0201 Katakana: %s\n", X0201_DEFAULT ? "ON" : "OFF"); - + fprintf(HELP_OUTPUT, " Decode MIME encoded string: " +#if MIME_DECODE_DEFAULT + "ON" +#else + "OFF" +#endif + "\n"); + fprintf(HELP_OUTPUT, " Convert JIS X 0201 Katakana: " +#if X0201_DEFAULT + "ON" +#else + "OFF" +#endif + "\n"); +fprintf(HELP_OUTPUT, " --help, --version output: " +#if HELP_OUTPUT_HELP_OUTPUT +"HELP_OUTPUT" +#else +"STDOUT" +#endif +"\n"); } void version(void) { - fprintf(stderr,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n"); + fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n"); } #endif /*PERL_XS*/