X-Git-Url: http://git.sourceforge.jp/view?p=nkf%2Fnkf.git;a=blobdiff_plain;f=nkf.c;h=505d19290fc58126e7d98b611bbe287ab318b356;hp=54ba1a0b2cb08898b1a6a7bfea4c45c35284bf5c;hb=477df6ae857330396b089025d986f949f27bc5ac;hpb=e827747c3b546fbccae27853ba5b3d9e325cb83d;ds=sidebyside diff --git a/nkf.c b/nkf.c index 54ba1a0..505d192 100644 --- a/nkf.c +++ b/nkf.c @@ -30,15 +30,54 @@ * 現在、nkf は SorceForge にてメンテナンスが続けられています。 * http://sourceforge.jp/projects/nkf/ ***********************************************************************/ -/* $Id: nkf.c,v 1.142 2007/10/05 10:57:50 naruse Exp $ */ +/* $Id: nkf.c,v 1.159 2007/12/23 07:55:20 naruse Exp $ */ #define NKF_VERSION "2.0.8" -#define NKF_RELEASE_DATE "2007-10-05" +#define NKF_RELEASE_DATE "2007-12-22" #define COPY_RIGHT \ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \ "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon" #include "config.h" #include "utf8tbl.h" + +#if defined(DEFAULT_CODE_JIS) +#elif defined(DEFAULT_CODE_SJIS) +#elif defined(DEFAULT_CODE_EUC) +#elif defined(DEFAULT_CODE_UTF8) +#else +#define DEFAULT_CODE_JIS 1 +#endif + +#ifndef MIME_DECODE_DEFAULT +#define MIME_DECODE_DEFAULT STRICT_MIME +#endif +#ifndef X0201_DEFAULT +#define X0201_DEFAULT TRUE +#endif + +#if DEFAULT_NEWLINE == 0x0D0A +#define PUT_NEWLINE(func) do {\ + func(0x0D);\ + func(0x0A);\ +} while (0) +#define OCONV_NEWLINE(func) do {\ + func(0, 0x0D);\ + func(0, 0x0A);\ +} while (0) +#elif DEFAULT_NEWLINE == 0x0D +#define PUT_NEWLINE(func) func(0x0D) +#define OCONV_NEWLINE(func) func(0, 0x0D) +#else +#define DEFAULT_NEWLINE 0x0A +#define PUT_NEWLINE(func) func(0x0A) +#define OCONV_NEWLINE(func) func(0, 0x0A) +#endif +#ifdef HELP_OUTPUT_STDERR +#define HELP_OUTPUT stderr +#else +#define HELP_OUTPUT stdout +#endif + #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS) #define MSDOS #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__) @@ -141,42 +180,29 @@ void djgpp_setbinmode(FILE *fp) /* state of output_mode and input_mode c2 0 means ASCII - X0201 - ISO8859_1 - X0208 + JIS_X_0201 + ISO_8859_1 + JIS_X_0208 EOF all termination c1 32bit data */ -#define ASCII 0 -#define X0208 1 -#define X0201 2 -#define ISO8859_1 8 -#define NO_X0201 3 -#define X0212 0x2844 -#define X0213_1 0x284F -#define X0213_2 0x2850 - /* Input Assumption */ #define JIS_INPUT 4 #define EUC_INPUT 16 #define SJIS_INPUT 5 #define LATIN1_INPUT 6 +#define UTF8_INPUT 13 +#define UTF16_INPUT 1015 +#define UTF32_INPUT 1017 + #define FIXED_MIME 7 #define STRICT_MIME 8 /* MIME ENCODE */ -#define ISO2022JP 9 -#define JAPANESE_EUC 10 -#define SHIFT_JIS 11 - -#define UTF8 12 -#define UTF8_INPUT 13 -#define UTF16_INPUT 1015 -#define UTF32_INPUT 1017 /* byte order */ @@ -185,8 +211,6 @@ void djgpp_setbinmode(FILE *fp) #define ENDIAN_2143 2143 #define ENDIAN_3412 3412 -#define WISH_TRUE 15 - /* ASCII CODE */ #define BS 0x08 @@ -204,6 +228,180 @@ void djgpp_setbinmode(FILE *fp) #define SS3 0x8f #define CRLF 0x0D0A + +/* encodings */ + +enum nkf_encodings { + ASCII, + ISO_8859_1, + ISO_2022_JP, + CP50220, + CP50221, + CP50222, + ISO_2022_JP_1, + ISO_2022_JP_3, + SHIFT_JIS, + WINDOWS_31J, + CP10001, + EUC_JP, + CP51932, + EUCJP_MS, + EUCJP_ASCII, + SHIFT_JISX0213, + SHIFT_JIS_2004, + EUC_JISX0213, + EUC_JIS_2004, + UTF_8, + UTF_8N, + UTF_8_BOM, + UTF8_MAC, + UTF_16, + UTF_16BE, + UTF_16BE_BOM, + UTF_16LE, + UTF_16LE_BOM, + UTF_32, + UTF_32BE, + UTF_32BE_BOM, + UTF_32LE, + UTF_32LE_BOM, + JIS_X_0201=0x1000, + JIS_X_0208, + JIS_X_0212, + JIS_X_0213_1, + JIS_X_0213_2, + BINARY +}; + +nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0); +nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0); +nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0); +nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0); +nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0); +void j_oconv(nkf_char c2, nkf_char c1); +void s_oconv(nkf_char c2, nkf_char c1); +void e_oconv(nkf_char c2, nkf_char c1); +void w_oconv(nkf_char c2, nkf_char c1); +void w_oconv16(nkf_char c2, nkf_char c1); +void w_oconv32(nkf_char c2, nkf_char c1); + +typedef struct { + char *name; + nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0); + void (*oconv_func)(nkf_char c2, nkf_char c1); +} nkf_native_encoding; + +nkf_native_encoding NkfEncodingASCII = { "US_ASCII", e_iconv, e_oconv }; +nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv }; +nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv }; +nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv }; +nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv }; +nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 }; +nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 }; + +typedef struct { + int id; + char *name; + nkf_native_encoding *based_encoding; +} nkf_encoding; +nkf_encoding nkf_encoding_table[] = { + {ASCII, "ASCII", &NkfEncodingASCII}, + {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII}, + {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingASCII}, + {CP50220, "CP50220", &NkfEncodingISO_2022_JP}, + {CP50221, "CP50221", &NkfEncodingISO_2022_JP}, + {CP50222, "CP50222", &NkfEncodingISO_2022_JP}, + {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP}, + {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP}, + {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS}, + {WINDOWS_31J, "WINDOWS-31J", &NkfEncodingShift_JIS}, + {CP10001, "CP10001", &NkfEncodingShift_JIS}, + {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP}, + {CP51932, "CP51932", &NkfEncodingEUC_JP}, + {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP}, + {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP}, + {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS}, + {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS}, + {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP}, + {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP}, + {UTF_8, "UTF-8", &NkfEncodingUTF_8}, + {UTF_8N, "UTF-8N", &NkfEncodingUTF_8}, + {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8}, + {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8}, + {UTF_16, "UTF-16", &NkfEncodingUTF_16}, + {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16}, + {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16}, + {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16}, + {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16}, + {UTF_32, "UTF-32", &NkfEncodingUTF_32}, + {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32}, + {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32}, + {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32}, + {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32}, + {BINARY, "BINARY", &NkfEncodingASCII}, + {-1, NULL, NULL} +}; +#define NKF_ENCODING_TABLE_SIZE 34 +struct { + const char *name; + const int id; +} encoding_name_to_id_table[] = { + {"ASCII", ASCII}, + {"ISO-2022-JP", ISO_2022_JP}, + {"X-ISO2022JP-CP932", CP50220}, + {"CP50220", CP50220}, + {"CP50221", CP50221}, + {"CP50222", CP50222}, + {"ISO-2022-JP-1", ISO_2022_JP_1}, + {"ISO-2022-JP-3", ISO_2022_JP_3}, + {"SHIFT_JIS", SHIFT_JIS}, + {"SJIS", SHIFT_JIS}, + {"WINDOWS-31J", WINDOWS_31J}, + {"CSWINDOWS31J", WINDOWS_31J}, + {"CP932", WINDOWS_31J}, + {"MS932", WINDOWS_31J}, + {"CP10001", CP10001}, + {"EUCJP", EUC_JP}, + {"EUC-JP", EUC_JP}, + {"CP51932", CP51932}, + {"EUC-JP-MS", EUCJP_MS}, + {"EUCJP-MS", EUCJP_MS}, + {"EUCJPMS", EUCJP_MS}, + {"EUC-JP-ASCII", EUCJP_ASCII}, + {"EUCJP-ASCII", EUCJP_ASCII}, + {"SHIFT_JISX0213", SHIFT_JISX0213}, + {"SHIFT_JIS-2004", SHIFT_JIS_2004}, + {"EUC-JISX0213", EUC_JISX0213}, + {"EUC-JIS-2004", EUC_JIS_2004}, + {"UTF-8", UTF_8}, + {"UTF-8N", UTF_8N}, + {"UTF-8-BOM", UTF_8_BOM}, + {"UTF8-MAC", UTF8_MAC}, + {"UTF-8-MAC", UTF8_MAC}, + {"UTF-16", UTF_16}, + {"UTF-16BE", UTF_16BE}, + {"UTF-16BE-BOM", UTF_16BE_BOM}, + {"UTF-16LE", UTF_16LE}, + {"UTF-16LE-BOM", UTF_16LE_BOM}, + {"UTF-32", UTF_32}, + {"UTF-32BE", UTF_32BE}, + {"UTF-32BE-BOM", UTF_32BE_BOM}, + {"UTF-32LE", UTF_32LE}, + {"UTF-32LE-BOM", UTF_32LE_BOM}, + {"BINARY", BINARY}, + {NULL, -1} +}; +#if defined(DEFAULT_CODE_JIS) +#define DEFAULT_ENCODING ISO_2022_JP +#elif defined(DEFAULT_CODE_SJIS) +#define DEFAULT_ENCODING SHIFT_JIS +#elif defined(DEFAULT_CODE_EUC) +#define DEFAULT_ENCODING EUC_JP +#elif defined(DEFAULT_CODE_UTF8) +#define DEFAULT_ENCODING UTF_8 +#endif + + #define is_alnum(c) \ (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9')) @@ -223,6 +421,9 @@ void djgpp_setbinmode(FILE *fp) ('a'<=c&&c<='f') ? (c-'a'+10) : 0) #define bin2hex(c) ("0123456789ABCDEF"[c&15]) #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3) +#define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \ + ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \ + && (c != '(') && (c != ')') && (c != '.') && (c != 0x22))) #define CP932_TABLE_BEGIN 0xFA #define CP932_TABLE_END 0xFC @@ -275,10 +476,8 @@ struct input_code{ }; static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */ +static nkf_encoding *output_encoding; -#ifndef PERL_XS -static const char *CopyRight = COPY_RIGHT; -#endif #if !defined(PERL_XS) && !defined(WIN32DLL) static nkf_char noconvert(FILE *f); #endif @@ -287,9 +486,7 @@ static nkf_char kanji_convert(FILE *f); static nkf_char h_conv(FILE *f,nkf_char c2,nkf_char c1); static nkf_char push_hold_buf(nkf_char c2); static void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0)); -static nkf_char s_iconv(nkf_char c2,nkf_char c1,nkf_char c0); static nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1); -static nkf_char e_iconv(nkf_char c2,nkf_char c1,nkf_char c0); #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) /* UCS Mapping * 0: Shift_JIS, eucJP-ascii @@ -318,9 +515,6 @@ static void encode_fallback_perl(nkf_char c); static void encode_fallback_subchar(nkf_char c); static void (*encode_fallback)(nkf_char c) = NULL; static nkf_char w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1); -static nkf_char w_iconv(nkf_char c2,nkf_char c1,nkf_char c0); -static nkf_char w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0); -static nkf_char w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0); static nkf_char unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1); static nkf_char w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1); static void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0); @@ -332,14 +526,8 @@ static void w_status(struct input_code *, nkf_char); static int output_bom_f = FALSE; static int output_endian = ENDIAN_BIG; static nkf_char e2w_conv(nkf_char c2,nkf_char c1); -static void w_oconv(nkf_char c2,nkf_char c1); -static void w_oconv16(nkf_char c2,nkf_char c1); -static void w_oconv32(nkf_char c2,nkf_char c1); #endif -static void e_oconv(nkf_char c2,nkf_char c1); static nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1); -static void s_oconv(nkf_char c2,nkf_char c1); -static void j_oconv(nkf_char c2,nkf_char c1); static void fold_conv(nkf_char c2,nkf_char c1); static void nl_conv(nkf_char c2,nkf_char c1); static void z_conv(nkf_char c2,nkf_char c1); @@ -381,6 +569,7 @@ static void mimeout_addchar(nkf_char c); #ifndef PERL_XS static void usage(void); static void version(void); +static void show_configuration(void); #endif static void options(unsigned char *c); static void reinit(void); @@ -414,18 +603,14 @@ static int rot_f = FALSE; /* rot14/43 mode */ static int hira_f = FALSE; /* hira/kata henkan */ static int input_f = FALSE; /* non fixed input code */ static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */ -static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */ +static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */ static int mime_decode_f = FALSE; /* mime decode is explicitly on */ static int mimebuf_f = FALSE; /* MIME buffered input */ static int broken_f = FALSE; /* convert ESC-less broken JIS */ static int iso8859_f = FALSE; /* ISO8859 through */ static int mimeout_f = FALSE; /* base64 mode */ -#if defined(MSDOS) || defined(__OS2__) -static int x0201_f = TRUE; /* Assume JISX0201 kana */ -#else -static int x0201_f = NO_X0201; /* Assume NO JISX0201 */ -#endif -static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */ +static int x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */ +static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */ #ifdef UNICODE_NORMALIZATION static int nfc_f = FALSE; @@ -478,7 +663,7 @@ static void debug(const char *str); static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0; #endif -static int guess_f = FALSE; +static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */ #if !defined PERL_XS static void print_guessed_code(char *filename); #endif @@ -529,7 +714,7 @@ struct input_code input_code_list[] = { {0} }; -static int mimeout_mode = 0; +static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */ static int base64_count = 0; /* X0208 -> ASCII converter */ @@ -704,7 +889,7 @@ static char *get_backup_filename(const char *suffix, const char *filename); #endif static int nlmode_f = 0; /* CR, LF, CRLF */ -static int input_nextline = 0; /* 0: unestablished, EOF: MIXED */ +static int input_newline = 0; /* 0: unestablished, EOF: MIXED */ static nkf_char prev_cr = 0; /* CR or 0 */ #ifdef EASYWIN /*Easy Win */ static int end_check; @@ -714,6 +899,57 @@ static int end_check; nkf_char std_gc_buf[STD_GC_BUFSIZE]; nkf_char std_gc_ndx; +char* nkf_strcpy(const char *str) +{ + char* result = malloc(strlen(str) + 1); + if (!result){ + perror(str); + return ""; + } + strcpy(result, str); + return result; +} + +static void nkf_str_upcase(const char *src, char *dest, size_t length) +{ + int i = 0; + for (; i < length && dest[i]; i++) { + dest[i] = nkf_toupper(src[i]); + } + dest[i] = 0; +} + +static nkf_encoding *nkf_enc_from_index(int idx) +{ + if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) { + return 0; + } + return &nkf_encoding_table[idx]; +} + +static int nkf_enc_find_index(const char *name) +{ + int i, index = -1; + for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) { + if (strcmp(name, encoding_name_to_id_table[i].name) == 0) { + return encoding_name_to_id_table[i].id; + } + } + return index; +} + +static nkf_encoding *nkf_enc_find(const char *name) +{ + int idx = -1; + idx = nkf_enc_find_index(name); + if (idx < 0) return 0; + return nkf_enc_from_index(idx); +} + +#define nkf_enc_name(enc) (enc)->name +#define nkf_enc_to_index(enc) (enc)->id +#define nkf_enc_to_base_encoding(enc) (enc)->based_encoding + #ifdef WIN32DLL #include "nkf32dll.c" #elif defined(PERL_XS) @@ -743,11 +979,10 @@ int main(int argc, char **argv) #ifdef X0212_ENABLE int x0212_f_back = x0212_f; #endif -#ifdef X0212_ENABLE int x0213_f_back = x0213_f; -#endif + int guess_f_back = guess_f; reinit(); - guess_f = TRUE; + guess_f = guess_f_back; mime_f = FALSE; #ifdef CHECK_OPTION debug_f = debug_f_back; @@ -758,10 +993,8 @@ int main(int argc, char **argv) #ifdef X0212_ENABLE x0212_f = x0212_f_back; #endif -#ifdef X0213_ENABLE x0213_f = x0213_f_back; -#endif - } + } #ifdef EXEC_IO if (exec_f){ int fds[2], pid; @@ -790,8 +1023,6 @@ int main(int argc, char **argv) } #endif } - if(x0201_f == WISH_TRUE) - x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201); if (binmode_f == TRUE) #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) @@ -825,13 +1056,12 @@ int main(int argc, char **argv) int is_argument_error = FALSE; while (argc--) { input_codename = NULL; - input_nextline = 0; + input_newline = 0; #ifdef CHECK_OPTION iconv_for_check = 0; #endif if ((fin = fopen((origfname = *argv++), "r")) == NULL) { - perror(*--argv); - *argv++; + perror(*(argv-1)); is_argument_error = TRUE; continue; } else { @@ -1063,7 +1293,8 @@ static const struct { {"hiragana","h1"}, {"katakana","h2"}, {"katakana-hiragana","h3"}, - {"guess", "g"}, + {"guess=", ""}, + {"guess", "g1"}, {"cp932", ""}, {"no-cp932", ""}, #ifdef X0212_ENABLE @@ -1125,6 +1356,7 @@ void options(unsigned char *cp) unsigned char *p; unsigned char *cp_back = NULL; char codeset[32]; + nkf_encoding *enc; if (option_mode==1) return; @@ -1161,16 +1393,15 @@ void options(unsigned char *cp) cp = (unsigned char *)long_option[i].alias; }else{ if (strcmp(long_option[i].name, "ic=") == 0){ - for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){ - codeset[i] = nkf_toupper(p[i]); - } - codeset[i] = 0; - if(strcmp(codeset, "ISO-2022-JP") == 0){ + nkf_str_upcase((char *)p, codeset, 32); + enc = nkf_enc_find(codeset); + switch (nkf_enc_to_index(enc)) { + case ISO_2022_JP: input_f = JIS_INPUT; - }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 || - strcmp(codeset, "CP50220") == 0 || - strcmp(codeset, "CP50221") == 0 || - strcmp(codeset, "CP50222") == 0){ + break; + case CP50220: + case CP50221: + case CP50222: input_f = JIS_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; @@ -1178,23 +1409,24 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){ + break; + case ISO_2022_JP_1: input_f = JIS_INPUT; #ifdef X0212_ENABLE x0212_f = TRUE; #endif - }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){ + break; + case ISO_2022_JP_3: input_f = JIS_INPUT; #ifdef X0212_ENABLE x0212_f = TRUE; #endif x0213_f = TRUE; - }else if(strcmp(codeset, "SHIFT_JIS") == 0){ + break; + case SHIFT_JIS: input_f = SJIS_INPUT; - }else if(strcmp(codeset, "WINDOWS-31J") == 0 || - strcmp(codeset, "CSWINDOWS31J") == 0 || - strcmp(codeset, "CP932") == 0 || - strcmp(codeset, "MS932") == 0){ + break; + case WINDOWS_31J: input_f = SJIS_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; @@ -1202,7 +1434,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "CP10001") == 0){ + break; + case CP10001: input_f = SJIS_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; @@ -1210,10 +1443,11 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP10001; #endif - }else if(strcmp(codeset, "EUCJP") == 0 || - strcmp(codeset, "EUC-JP") == 0){ + break; + case EUC_JP: input_f = EUC_INPUT; - }else if(strcmp(codeset, "CP51932") == 0){ + break; + case CP51932: input_f = EUC_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = TRUE; @@ -1221,9 +1455,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "EUC-JP-MS") == 0 || - strcmp(codeset, "EUCJP-MS") == 0 || - strcmp(codeset, "EUCJPMS") == 0){ + break; + case EUCJP_MS: input_f = EUC_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; @@ -1231,8 +1464,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_MS; #endif - }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 || - strcmp(codeset, "EUCJP-ASCII") == 0){ + break; + case EUCJP_ASCII: input_f = EUC_INPUT; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; @@ -1240,82 +1473,83 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_ASCII; #endif - }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 || - strcmp(codeset, "SHIFT_JIS-2004") == 0){ + break; + case SHIFT_JISX0213: + case SHIFT_JIS_2004: input_f = SJIS_INPUT; x0213_f = TRUE; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; #endif - }else if(strcmp(codeset, "EUC-JISX0213") == 0 || - strcmp(codeset, "EUC-JIS-2004") == 0){ + break; + case EUC_JISX0213: + case EUC_JIS_2004: input_f = EUC_INPUT; x0213_f = TRUE; #ifdef SHIFTJIS_CP932 cp51932_f = FALSE; #endif + break; #ifdef UTF8_INPUT_ENABLE - }else if(strcmp(codeset, "UTF-8") == 0 || - strcmp(codeset, "UTF-8N") == 0 || - strcmp(codeset, "UTF-8-BOM") == 0){ + case UTF_8: + case UTF_8N: + case UTF_8_BOM: input_f = UTF8_INPUT; + break; #ifdef UNICODE_NORMALIZATION - }else if(strcmp(codeset, "UTF8-MAC") == 0 || - strcmp(codeset, "UTF-8-MAC") == 0){ + case UTF8_MAC: input_f = UTF8_INPUT; nfc_f = TRUE; + break; #endif - }else if(strcmp(codeset, "UTF-16") == 0 || - strcmp(codeset, "UTF-16BE") == 0 || - strcmp(codeset, "UTF-16BE-BOM") == 0){ + case UTF_16: + case UTF_16BE: + case UTF_16BE_BOM: input_f = UTF16_INPUT; input_endian = ENDIAN_BIG; - }else if(strcmp(codeset, "UTF-16LE") == 0 || - strcmp(codeset, "UTF-16LE-BOM") == 0){ + break; + case UTF_16LE: + case UTF_16LE_BOM: input_f = UTF16_INPUT; input_endian = ENDIAN_LITTLE; - }else if(strcmp(codeset, "UTF-32") == 0 || - strcmp(codeset, "UTF-32BE") == 0 || - strcmp(codeset, "UTF-32BE-BOM") == 0){ + break; + case UTF_32: + case UTF_32BE: + case UTF_32BE_BOM: input_f = UTF32_INPUT; input_endian = ENDIAN_BIG; - }else if(strcmp(codeset, "UTF-32LE") == 0 || - strcmp(codeset, "UTF-32LE-BOM") == 0){ + break; + case UTF_32LE: + case UTF_32LE_BOM: input_f = UTF32_INPUT; input_endian = ENDIAN_LITTLE; + break; #endif - } else { + default: fprintf(stderr, "unknown input encoding: %s\n", codeset); + break; } continue; } if (strcmp(long_option[i].name, "oc=") == 0){ x0201_f = FALSE; - for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){ - codeset[i] = nkf_toupper(p[i]); - } - codeset[i] = 0; - if(strcmp(codeset, "ISO-2022-JP") == 0){ - output_conv = j_oconv; - }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){ + nkf_str_upcase((char *)p, codeset, 32); + output_encoding = nkf_enc_find(codeset); + switch (nkf_enc_to_index(output_encoding)) { + case ISO_2022_JP: output_conv = j_oconv; - no_cp932ext_f = TRUE; -#ifdef SHIFTJIS_CP932 - cp932inv_f = FALSE; -#endif -#ifdef UTF8_OUTPUT_ENABLE - ms_ucs_map_f = UCS_MAP_CP932; -#endif - }else if(strcmp(codeset, "CP50220") == 0){ - output_conv = j_oconv; - x0201_f = TRUE; + break; + case CP50220: + output_conv = j_oconv; + x0201_f = TRUE; #ifdef SHIFTJIS_CP932 - cp932inv_f = FALSE; + cp932inv_f = FALSE; #endif #ifdef UTF8_OUTPUT_ENABLE - ms_ucs_map_f = UCS_MAP_CP932; + ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "CP50221") == 0){ + break; + case CP50221: output_conv = j_oconv; #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; @@ -1323,7 +1557,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){ + break; + case ISO_2022_JP_1: output_conv = j_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; @@ -1331,7 +1566,8 @@ void options(unsigned char *cp) #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; #endif - }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){ + break; + case ISO_2022_JP_3: output_conv = j_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; @@ -1340,25 +1576,26 @@ void options(unsigned char *cp) #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; #endif - }else if(strcmp(codeset, "SHIFT_JIS") == 0){ + break; + case SHIFT_JIS: output_conv = s_oconv; - }else if(strcmp(codeset, "WINDOWS-31J") == 0 || - strcmp(codeset, "CSWINDOWS31J") == 0 || - strcmp(codeset, "CP932") == 0 || - strcmp(codeset, "MS932") == 0){ + break; + case WINDOWS_31J: output_conv = s_oconv; #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "CP10001") == 0){ + break; + case CP10001: output_conv = s_oconv; #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP10001; #endif - }else if(strcmp(codeset, "EUCJP") == 0 || - strcmp(codeset, "EUC-JP") == 0){ + break; + case EUC_JP: output_conv = e_oconv; - }else if(strcmp(codeset, "CP51932") == 0){ + break; + case CP51932: output_conv = e_oconv; #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; @@ -1366,9 +1603,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_CP932; #endif - }else if(strcmp(codeset, "EUC-JP-MS") == 0 || - strcmp(codeset, "EUCJP-MS") == 0 || - strcmp(codeset, "EUCJPMS") == 0){ + break; + case EUCJP_MS: output_conv = e_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; @@ -1376,8 +1612,8 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_MS; #endif - }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 || - strcmp(codeset, "EUCJP-ASCII") == 0){ + break; + case EUCJP_ASCII: output_conv = e_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; @@ -1385,15 +1621,17 @@ void options(unsigned char *cp) #ifdef UTF8_OUTPUT_ENABLE ms_ucs_map_f = UCS_MAP_ASCII; #endif - }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 || - strcmp(codeset, "SHIFT_JIS-2004") == 0){ - output_conv = s_oconv; - x0213_f = TRUE; + break; + case SHIFT_JISX0213: + case SHIFT_JIS_2004: + output_conv = s_oconv; + x0213_f = TRUE; #ifdef SHIFTJIS_CP932 - cp932inv_f = FALSE; + cp932inv_f = FALSE; #endif - }else if(strcmp(codeset, "EUC-JISX0213") == 0 || - strcmp(codeset, "EUC-JIS-2004") == 0){ + break; + case EUC_JISX0213: + case EUC_JIS_2004: output_conv = e_oconv; #ifdef X0212_ENABLE x0212_f = TRUE; @@ -1402,46 +1640,65 @@ void options(unsigned char *cp) #ifdef SHIFTJIS_CP932 cp932inv_f = FALSE; #endif + break; #ifdef UTF8_OUTPUT_ENABLE - }else if(strcmp(codeset, "UTF-8") == 0){ - output_conv = w_oconv; - }else if(strcmp(codeset, "UTF-8N") == 0){ + case UTF_8: + case UTF_8N: output_conv = w_oconv; - }else if(strcmp(codeset, "UTF-8-BOM") == 0){ + break; + case UTF_8_BOM: output_conv = w_oconv; output_bom_f = TRUE; - }else if(strcmp(codeset, "UTF-16BE") == 0){ + break; + case UTF_16BE: output_conv = w_oconv16; - }else if(strcmp(codeset, "UTF-16") == 0 || - strcmp(codeset, "UTF-16BE-BOM") == 0){ + break; + case UTF_16: + case UTF_16BE_BOM: output_conv = w_oconv16; output_bom_f = TRUE; - }else if(strcmp(codeset, "UTF-16LE") == 0){ + break; + case UTF_16LE: output_conv = w_oconv16; output_endian = ENDIAN_LITTLE; - }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){ + break; + case UTF_16LE_BOM: output_conv = w_oconv16; output_endian = ENDIAN_LITTLE; output_bom_f = TRUE; - }else if(strcmp(codeset, "UTF-32") == 0 || - strcmp(codeset, "UTF-32BE") == 0){ + break; + case UTF_32: + case UTF_32BE: output_conv = w_oconv32; - }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){ + break; + case UTF_32BE_BOM: output_conv = w_oconv32; output_bom_f = TRUE; - }else if(strcmp(codeset, "UTF-32LE") == 0){ + break; + case UTF_32LE: output_conv = w_oconv32; output_endian = ENDIAN_LITTLE; - }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){ + break; + case UTF_32LE_BOM: output_conv = w_oconv32; output_endian = ENDIAN_LITTLE; output_bom_f = TRUE; + break; #endif - } else { + default: fprintf(stderr, "unknown output encoding: %s\n", codeset); + break; } continue; } + if (strcmp(long_option[i].name, "guess=") == 0){ + if (p[0] == '1') { + guess_f = 2; + } else { + guess_f = 1; + } + continue; + } #ifdef OVERWRITE if (strcmp(long_option[i].name, "overwrite") == 0){ file_out_f = TRUE; @@ -1637,7 +1894,7 @@ void options(unsigned char *cp) if (*cp=='1') { /* alias of -t */ nop_f = TRUE; - *cp++; + *cp += 1; } else if (*cp=='2') { /* * -t with put/get @@ -1646,20 +1903,23 @@ void options(unsigned char *cp) * */ nop_f = 2; - *cp++; + *cp += 1; } else nop_f = TRUE; continue; case 'j': /* JIS output */ case 'n': output_conv = j_oconv; + output_encoding = nkf_enc_from_index(ISO_2022_JP); continue; case 'e': /* AT&T EUC output */ output_conv = e_oconv; cp932inv_f = FALSE; + output_encoding = nkf_enc_from_index(EUC_JP); continue; case 's': /* SJIS output */ output_conv = s_oconv; + output_encoding = nkf_enc_from_index(SHIFT_JIS); continue; case 'l': /* ISO8859 Latin-1 support, no conversion */ iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */ @@ -1693,7 +1953,7 @@ void options(unsigned char *cp) #endif #ifndef PERL_XS case 'V': - version(); + show_configuration(); exit(1); break; case 'v': @@ -1707,16 +1967,22 @@ void options(unsigned char *cp) output_conv = w_oconv; cp++; if (cp[0] == '0'){ cp++; + output_encoding = nkf_enc_from_index(UTF_8N); } else { output_bom_f = TRUE; + output_encoding = nkf_enc_from_index(UTF_8_BOM); } } else { + int enc_idx; if ('1'== cp[0] && '6'==cp[1]) { output_conv = w_oconv16; cp+=2; + enc_idx = UTF_16; } else if ('3'== cp[0] && '2'==cp[1]) { output_conv = w_oconv32; cp+=2; + enc_idx = UTF_32; } else { output_conv = w_oconv; + output_encoding = nkf_enc_from_index(UTF_8); continue; } if (cp[0]=='L') { @@ -1725,13 +1991,21 @@ void options(unsigned char *cp) } else if (cp[0] == 'B') { cp++; } else { + output_encoding = nkf_enc_from_index(enc_idx); continue; } if (cp[0] == '0'){ cp++; + enc_idx = enc_idx == UTF_16 + ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE) + : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE); } else { output_bom_f = TRUE; + enc_idx = enc_idx == UTF_16 + ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM) + : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM); } + output_encoding = nkf_enc_from_index(enc_idx); } continue; #endif @@ -1771,7 +2045,6 @@ void options(unsigned char *cp) continue; case 'S': /* MS Kanji input */ input_f = SJIS_INPUT; - if (x0201_f==NO_X0201) x0201_f=TRUE; continue; case 'Z': /* Convert X0208 alphabet to asii */ /* alpha_f @@ -1799,8 +2072,7 @@ void options(unsigned char *cp) 0xa0-0xd in MS Kanji (0xa0-0xdf) */ continue; - case 'X': /* Assume X0201 kana */ - /* Default value is NO_X0201 for EUC/MS-Kanji mix */ + case 'X': /* Convert X0201 kana to X0208 */ x0201_f = TRUE; continue; case 'F': /* prserve new lines */ @@ -1869,9 +2141,9 @@ void options(unsigned char *cp) case 'd':/* delete cr code */ nlmode_f = LF; continue; - case 'I': /* ISO-2022-JP output */ - iso2022jp_f = TRUE; - continue; + case 'I': /* ISO-2022-JP output */ + iso2022jp_f = TRUE; + continue; case 'L': /* line mode */ if (*cp=='u') { /* unix */ nlmode_f = LF; cp++; @@ -1885,7 +2157,15 @@ void options(unsigned char *cp) continue; #ifndef PERL_XS case 'g': - guess_f = TRUE; + if (*cp == '1') { + guess_f = 2; + cp++; + } else if (*cp == '0') { + guess_f = 1; + cp++; + } else { + guess_f = 1; + } continue; #endif case SP: @@ -1946,7 +2226,8 @@ void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_ch #define SCORE_KANA (SCORE_L2 << 1) /* いわゆる半角カナ */ #define SCORE_DEPEND (SCORE_KANA << 1) /* 機種依存文字 */ #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932 による読み換え (IBM extended characters) */ -#define SCORE_NO_EXIST (SCORE_CP932 << 1) /* 存在しない文字 */ +#define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */ +#define SCORE_NO_EXIST (SCORE_X0212 << 1) /* 存在しない文字 */ #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME による指定 */ #define SCORE_ERROR (SCORE_iMIME << 1) /* エラー */ @@ -1990,6 +2271,8 @@ void code_score(struct input_code *ptr) set_code_score(ptr, SCORE_ERROR); }else if (c2 == SSO){ set_code_score(ptr, SCORE_KANA); + }else if (c2 == 0x8f){ + set_code_score(ptr, SCORE_X0212); #ifdef UTF8_OUTPUT_ENABLE }else if (!e2w_conv(c2, c1)){ set_code_score(ptr, SCORE_NO_EXIST); @@ -2059,9 +2342,12 @@ void s_status(struct input_code *ptr, nkf_char c) status_push_ch(ptr, c); code_score(ptr); status_clear(ptr); - }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){ + }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){ ptr->stat = 1; status_push_ch(ptr, c); + }else if (0xed <= c && c <= 0xee){ + ptr->stat = 3; + status_push_ch(ptr, c); #ifdef SHIFTJIS_CP932 }else if (is_ibmext_in_sjis(c)){ ptr->stat = 2; @@ -2091,7 +2377,7 @@ void s_status(struct input_code *ptr, nkf_char c) if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) { status_push_ch(ptr, c); if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) { - code_score(ptr); + set_code_score(ptr, SCORE_CP932); status_clear(ptr); break; } @@ -2099,6 +2385,16 @@ void s_status(struct input_code *ptr, nkf_char c) #endif /* SHIFTJIS_CP932 */ status_disable(ptr); break; + case 3: + if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){ + status_push_ch(ptr, c); + s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]); + set_code_score(ptr, SCORE_CP932); + status_clear(ptr); + }else{ + status_disable(ptr); + } + break; } } @@ -2629,16 +2925,16 @@ nkf_char kanji_convert(FILE *f) NEXT; } else { /* estab_f==TRUE */ if (iso8859_f) { - c2 = ISO8859_1; + c2 = ISO_8859_1; c1 &= 0x7f; SEND; } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) { /* SJIS X0201 Case... */ - if(iso2022jp_f && x0201_f==NO_X0201) { + if (iso2022jp_f && !x0201_f) { (*oconv)(GETA1, GETA2); NEXT; } else { - c2 = X0201; + c2 = JIS_X_0201; c1 &= 0x7f; SEND; } @@ -2647,11 +2943,11 @@ nkf_char kanji_convert(FILE *f) c1 = (*i_getc)(f); /* skip SSO */ code_status(c1); if (SSP<=c1 && c1<0xe0) { - if(iso2022jp_f && x0201_f==NO_X0201) { + if (iso2022jp_f && !x0201_f) { (*oconv)(GETA1, GETA2); NEXT; } else { - c2 = X0201; + c2 = JIS_X_0201; c1 &= 0x7f; SEND; } @@ -2661,7 +2957,7 @@ nkf_char kanji_convert(FILE *f) } else if (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE)) { /* CP10001 */ - c2 = X0201; + c2 = JIS_X_0201; c1 &= 0x7f; SEND; } else { @@ -2675,23 +2971,23 @@ nkf_char kanji_convert(FILE *f) if (shift_mode) { /* output 1 shifted byte */ if (iso8859_f) { - c2 = ISO8859_1; + c2 = ISO_8859_1; SEND; } else if (SP <= c1 && c1 < (0xe0&0x7f)){ /* output 1 shifted byte */ - if(iso2022jp_f && x0201_f==NO_X0201) { + if (iso2022jp_f && !x0201_f) { (*oconv)(GETA1, GETA2); NEXT; } else { - c2 = X0201; + c2 = JIS_X_0201; SEND; } } else { /* look like bogus code */ NEXT; } - } else if (input_mode == X0208 || input_mode == X0212 || - input_mode == X0213_1 || input_mode == X0213_2) { + } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 || + input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) { /* in case of Kanji shifted */ c2 = c1; NEXT; @@ -2739,7 +3035,7 @@ nkf_char kanji_convert(FILE *f) LAST; } else if (c1 == '@'|| c1 == 'B') { /* This is kanji introduction */ - input_mode = X0208; + input_mode = JIS_X_0208; shift_mode = FALSE; set_input_codename("ISO-2022-JP"); #ifdef CHECK_OPTION @@ -2756,21 +3052,21 @@ nkf_char kanji_convert(FILE *f) LAST; } else if (c1 == '@'|| c1 == 'B') { /* This is kanji introduction */ - input_mode = X0208; + input_mode = JIS_X_0208; shift_mode = FALSE; NEXT; #ifdef X0212_ENABLE } else if (c1 == 'D'){ - input_mode = X0212; + input_mode = JIS_X_0212; shift_mode = FALSE; NEXT; #endif /* X0212_ENABLE */ - } else if (c1 == (X0213_1&0x7F)){ - input_mode = X0213_1; + } else if (c1 == 0x4F){ + input_mode = JIS_X_0213_1; shift_mode = FALSE; NEXT; - } else if (c1 == (X0213_2&0x7F)){ - input_mode = X0213_2; + } else if (c1 == 0x50){ + input_mode = JIS_X_0213_2; shift_mode = FALSE; NEXT; } else { @@ -2783,7 +3079,7 @@ nkf_char kanji_convert(FILE *f) } } else if (broken_f&0x2) { /* accept any ESC-(-x as broken code ... */ - input_mode = X0208; + input_mode = JIS_X_0208; shift_mode = FALSE; NEXT; } else { @@ -2801,7 +3097,7 @@ nkf_char kanji_convert(FILE *f) } else { if (c1 == 'I') { /* This is X0201 kana introduction */ - input_mode = X0201; shift_mode = X0201; + input_mode = JIS_X_0201; shift_mode = JIS_X_0201; NEXT; } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') { /* This is X0208 kanji introduction */ @@ -2822,7 +3118,7 @@ nkf_char kanji_convert(FILE *f) c3 = (*i_getc)(f); /* skip SS2 */ if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){ c1 = c3; - c2 = X0201; + c2 = JIS_X_0201; SEND; }else{ (*i_ungetc)(c3, f); @@ -2906,7 +3202,7 @@ nkf_char kanji_convert(FILE *f) SEND; } } - } else if (c1 == DEL && input_mode == X0208) { + } else if (c1 == DEL && input_mode == JIS_X_0208) { /* CP5022x */ c2 = c1; NEXT; @@ -2937,8 +3233,8 @@ nkf_char kanji_convert(FILE *f) break; } break; - case X0208: - case X0213_1: + case JIS_X_0208: + case JIS_X_0213_1: if (ms_ucs_map_f && 0x7F <= c2 && c2 <= 0x92 && 0x21 <= c1 && c1 <= 0x7E) { @@ -2950,11 +3246,11 @@ nkf_char kanji_convert(FILE *f) (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */ break; #ifdef X0212_ENABLE - case X0212: + case JIS_X_0212: (*oconv)(PREFIX_EUCG3 | c2, c1); break; #endif /* X0212_ENABLE */ - case X0213_2: + case JIS_X_0213_2: (*oconv)(PREFIX_EUCG3 | c2, c1); break; default: @@ -3050,7 +3346,7 @@ h_conv(FILE *f, nkf_char c2, nkf_char c1) (*iconv)(0, c2, 0); continue; }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){ - (*iconv)(X0201, c2, 0); + (*iconv)(JIS_X_0201, c2, 0); continue; } if (hold_index < hold_count){ @@ -3181,7 +3477,7 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0) { - if (c2 == X0201) { + if (c2 == JIS_X_0201) { c1 &= 0x7f; } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) { /* NOP */ @@ -3200,7 +3496,7 @@ nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0) nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0) { - if (c2 == X0201) { + if (c2 == JIS_X_0201) { c1 &= 0x7f; #ifdef X0212_ENABLE }else if (c2 == 0x8f){ @@ -3229,7 +3525,7 @@ nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0) } #endif /* X0212_ENABLE */ } else if (c2 == SSO){ - c2 = X0201; + c2 = JIS_X_0201; c1 &= 0x7f; } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) { /* NOP */ @@ -3647,7 +3943,7 @@ nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *p c2 &= 0x7f; c2 |= PREFIX_EUCG3; } - if (c2 == SO) c2 = X0201; + if (c2 == SO) c2 = JIS_X_0201; c1 = val & 0x7f; if (p2) *p2 = c2; if (p1) *p1 = c1; @@ -3747,7 +4043,7 @@ nkf_char e2w_conv(nkf_char c2, nkf_char c1) { const unsigned short *p; - if (c2 == X0201) { + if (c2 == JIS_X_0201) { if (ms_ucs_map_f == UCS_MAP_CP10001) { switch (c1) { case 0x20: @@ -3828,11 +4124,11 @@ void w_oconv(nkf_char c2, nkf_char c1) if (c2 == 0) { output_mode = ASCII; (*o_putc)(c1); - } else if (c2 == ISO8859_1) { - output_mode = ISO8859_1; + } else if (c2 == ISO_8859_1) { + output_mode = UTF_8; (*o_putc)(c1 | 0x080); } else { - output_mode = UTF8; + output_mode = UTF_8; val = e2w_conv(c2, c1); if (val){ w16w_conv(val, &c2, &c1, &c0); @@ -3863,7 +4159,7 @@ void w_oconv16(nkf_char c2, nkf_char c1) return; } - if (c2 == ISO8859_1) { + if (c2 == ISO_8859_1) { c2 = 0; c1 |= 0x80; #ifdef NUMCHAR_OPTION @@ -3928,7 +4224,7 @@ void w_oconv32(nkf_char c2, nkf_char c1) return; } - if (c2 == ISO8859_1) { + if (c2 == ISO_8859_1) { c1 |= 0x80; #ifdef NUMCHAR_OPTION } else if (c2 == 0 && is_unicode_capsule(c1)) { @@ -3987,15 +4283,15 @@ void e_oconv(nkf_char c2, nkf_char c1) } else if (c2 == 0) { output_mode = ASCII; (*o_putc)(c1); - } else if (c2 == X0201) { - output_mode = JAPANESE_EUC; + } else if (c2 == JIS_X_0201) { + output_mode = EUC_JP; (*o_putc)(SSO); (*o_putc)(c1|0x80); - } else if (c2 == ISO8859_1) { - output_mode = ISO8859_1; + } else if (c2 == ISO_8859_1) { + output_mode = ISO_8859_1; (*o_putc)(c1 | 0x080); #ifdef X0212_ENABLE } else if (is_eucg3(c2)){ - output_mode = JAPANESE_EUC; + output_mode = EUC_JP; #ifdef SHIFTJIS_CP932 if (!cp932inv_f){ nkf_char s2, s1; @@ -4023,7 +4319,7 @@ void e_oconv(nkf_char c2, nkf_char c1) set_iconv(FALSE, 0); return; /* too late to rescue this char */ } - output_mode = JAPANESE_EUC; + output_mode = EUC_JP; (*o_putc)(c2 | 0x080); (*o_putc)(c1 | 0x080); } @@ -4130,11 +4426,11 @@ void s_oconv(nkf_char c2, nkf_char c1) } else if (c2 == 0) { output_mode = ASCII; (*o_putc)(c1); - } else if (c2 == X0201) { + } else if (c2 == JIS_X_0201) { output_mode = SHIFT_JIS; (*o_putc)(c1|0x80); - } else if (c2 == ISO8859_1) { - output_mode = ISO8859_1; + } else if (c2 == ISO_8859_1) { + output_mode = ISO_8859_1; (*o_putc)(c1 | 0x080); #ifdef X0212_ENABLE } else if (is_eucg3(c2)){ @@ -4191,7 +4487,7 @@ void j_oconv(nkf_char c2, nkf_char c1) } #endif if (c2 == EOF) { - if (output_mode !=ASCII && output_mode!=ISO8859_1) { + if (output_mode !=ASCII && output_mode!=ISO_8859_1) { (*o_putc)(ESC); (*o_putc)('('); (*o_putc)(ascii_intro); @@ -4201,41 +4497,41 @@ void j_oconv(nkf_char c2, nkf_char c1) #ifdef X0212_ENABLE } else if (is_eucg3(c2)){ if(x0213_f){ - if(output_mode!=X0213_2){ - output_mode = X0213_2; + if(output_mode!=JIS_X_0213_2){ + output_mode = JIS_X_0213_2; (*o_putc)(ESC); (*o_putc)('$'); (*o_putc)('('); - (*o_putc)(X0213_2&0x7F); + (*o_putc)(0x50); } }else{ - if(output_mode!=X0212){ - output_mode = X0212; + if(output_mode!=JIS_X_0212){ + output_mode = JIS_X_0212; (*o_putc)(ESC); (*o_putc)('$'); (*o_putc)('('); - (*o_putc)(X0212&0x7F); + (*o_putc)(0x44); } } (*o_putc)(c2 & 0x7f); (*o_putc)(c1); #endif - } else if (c2==X0201) { - if (output_mode!=X0201) { - output_mode = X0201; + } else if (c2==JIS_X_0201) { + if (output_mode!=JIS_X_0201) { + output_mode = JIS_X_0201; (*o_putc)(ESC); (*o_putc)('('); (*o_putc)('I'); } (*o_putc)(c1); - } else if (c2==ISO8859_1) { + } else if (c2==ISO_8859_1) { /* iso8859 introduction, or 8th bit on */ /* Can we convert in 7bit form using ESC-'-'-A ? Is this popular? */ - output_mode = ISO8859_1; + output_mode = ISO_8859_1; (*o_putc)(c1|0x80); } else if (c2 == 0) { - if (output_mode !=ASCII && output_mode!=ISO8859_1) { + if (output_mode !=ASCII && output_mode!=ISO_8859_1) { (*o_putc)(ESC); (*o_putc)('('); (*o_putc)(ascii_intro); @@ -4247,15 +4543,15 @@ void j_oconv(nkf_char c2, nkf_char c1) ? c2<0x20 || 0x92fold_len+fold_margin) { /* too many kinsoku suspension */ f_line = char_size(c2,c1); fold_state = LF; /* We can't wait, do fold now */ - } else if (c2==X0201) { + } else if (c2==JIS_X_0201) { /* simple kinsoku rules return 1 means no folding */ if (c1==(0xde&0x7f)) fold_state = 1; /* ゛*/ else if (c1==(0xdf&0x7f)) fold_state = 1; /* ゜*/ @@ -4513,13 +4809,13 @@ void fold_conv(nkf_char c2, nkf_char c1) /* terminator process */ switch(fold_state) { case LF: - (*o_fconv)(0,LF); + OCONV_NEWLINE((*o_fconv)); (*o_fconv)(c2,c1); break; case 0: return; case CR: - (*o_fconv)(0,LF); + OCONV_NEWLINE((*o_fconv)); break; case TAB: case SP: @@ -4537,14 +4833,14 @@ void z_conv(nkf_char c2, nkf_char c1) /* if (c2) c1 &= 0x7f; assertion */ - if (c2 == X0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) { + if (c2 == JIS_X_0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) { (*o_zconv)(c2,c1); return; } if (x0201_f) { - if (z_prev2 == X0201) { - if (c2 == X0201) { + if (z_prev2 == JIS_X_0201) { + if (c2 == JIS_X_0201) { if (c1 == (0xde&0x7f)) { /* 濁点 */ z_prev2 = 0; (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]); @@ -4558,7 +4854,7 @@ void z_conv(nkf_char c2, nkf_char c1) z_prev2 = 0; (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]); } - if (c2 == X0201) { + if (c2 == JIS_X_0201) { if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) { /* wait for 濁点 or 半濁点 */ z_prev1 = c1; @@ -4650,7 +4946,7 @@ void z_conv(nkf_char c2, nkf_char c1) break; } if (c) { - (*o_zconv)(X0201, c); + (*o_zconv)(JIS_X_0201, c); return; } } else if (c2 == 0x25) { @@ -4672,9 +4968,9 @@ void z_conv(nkf_char c2, nkf_char c1) }; if (fullwidth_to_halfwidth[c1-0x20]){ c2 = fullwidth_to_halfwidth[c1-0x20]; - (*o_zconv)(X0201, c2>>8); + (*o_zconv)(JIS_X_0201, c2>>8); if (c2 & 0xFF) { - (*o_zconv)(X0201, c2&0xFF); + (*o_zconv)(JIS_X_0201, c2&0xFF); } return; } @@ -4703,7 +4999,7 @@ void z_conv(nkf_char c2, nkf_char c1) void rot_conv(nkf_char c2, nkf_char c1) { - if (c2==0 || c2==X0201 || c2==ISO8859_1) { + if (c2==0 || c2==JIS_X_0201 || c2==ISO_8859_1) { c1 = rot13(c1); } else if (c2) { c1 = rot47(c1); @@ -4821,9 +5117,9 @@ nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = { }; static const nkf_char mime_encode[] = { - JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201, + EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201, #if defined(UTF8_INPUT_ENABLE) - UTF8, UTF8, + UTF_8, UTF_8, #endif ASCII, 0 @@ -5012,24 +5308,51 @@ void set_input_codename(char *codename) } } +static char* get_guessed_code(void) +{ + if (input_codename && !*input_codename) { + input_codename = "BINARY"; + } else { + struct input_code *p = find_inputcode_byfunc(iconv); + if (!input_codename) { + input_codename = "ASCII"; + } else if (strcmp(input_codename, "Shift_JIS") == 0) { + if (p->score & (SCORE_DEPEND|SCORE_CP932)) + input_codename = "CP932"; + } else if (strcmp(input_codename, "EUC-JP") == 0) { + if (p->score & (SCORE_X0212)) + input_codename = "EUCJP-MS"; + else if (p->score & (SCORE_DEPEND|SCORE_CP932)) + input_codename = "CP51932"; + } else if (strcmp(input_codename, "ISO-2022-JP") == 0) { + if (p->score & (SCORE_KANA)) + input_codename = "CP50221"; + else if (p->score & (SCORE_DEPEND|SCORE_CP932)) + input_codename = "CP50220"; + } + } + return input_codename; +} + #if !defined(PERL_XS) && !defined(WIN32DLL) void print_guessed_code(char *filename) { - char *codename = "BINARY"; - char *str_nlmode = NULL; if (filename != NULL) printf("%s: ", filename); if (input_codename && !*input_codename) { printf("BINARY\n"); } else { - struct input_code *p = find_inputcode_byfunc(iconv); - printf("%s%s%s\n", - (input_codename ? input_codename : "ASCII"), - ((p->score & (SCORE_DEPEND|SCORE_CP932|SCORE_NO_EXIST)) ? "+" : ""), - input_nextline == CR ? " (CR)" : - input_nextline == LF ? " (LF)" : - input_nextline == CRLF ? " (CRLF)" : - input_nextline == EOF ? " (MIXED NL)" : - ""); + input_codename = get_guessed_code(); + if (guess_f == 1) { + printf("%s\n", input_codename); + } else { + printf("%s%s\n", + input_codename, + input_newline == CR ? " (CR)" : + input_newline == LF ? " (LF)" : + input_newline == CRLF ? " (CRLF)" : + input_newline == EOF ? " (MIXED NL)" : + ""); + } } } #endif /*WIN32DLL*/ @@ -5485,13 +5808,15 @@ nkf_char base64decode(nkf_char c) if (c > '@') { if (c < '[') { i = c - 'A'; /* A..Z 0-25 */ + } else if (c == '_') { + i = '?' /* 63 */ ; /* _ 63 */ } else { i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */ } } else if (c > '/') { i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */ - } else if (c == '+') { - i = '>' /* 62 */ ; /* + 62 */ + } else if (c == '+' || c == '-') { + i = '>' /* 62 */ ; /* + and - 62 */ } else { i = '?' /* 63 */ ; /* / 63 */ } @@ -5505,8 +5830,6 @@ static nkf_char b64c; #define MIMEOUT_BUF_LENGTH (60) char mimeout_buf[MIMEOUT_BUF_LENGTH+1]; int mimeout_buf_count = 0; -int mimeout_preserve_space = 0; -#define itoh4(c) (c>=10?c+'A'-10:c+'0') void open_mime(nkf_char mode) { @@ -5521,35 +5844,30 @@ void open_mime(nkf_char mode) } } mimeout_mode = mime_encode_method[i]; - i = 0; if (base64_count>45) { if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){ (*o_mputc)(mimeout_buf[i]); i++; } - (*o_mputc)(LF); + PUT_NEWLINE((*o_mputc)); (*o_mputc)(SP); base64_count = 1; - if (!mimeout_preserve_space && mimeout_buf_count>0 + if (mimeout_buf_count>0 && (mimeout_buf[i]==SP || mimeout_buf[i]==TAB || mimeout_buf[i]==CR || mimeout_buf[i]==LF)) { i++; } } - if (!mimeout_preserve_space) { - for (;i 0) { if (mimeout_f!=FIXED_MIME) { close_mime(); } else if (mimeout_mode != 'Q') @@ -5604,8 +5922,8 @@ void mimeout_addchar(nkf_char c) base64_count = 0; } else if(!nkf_isalnum(c)) { (*o_mputc)('='); - (*o_mputc)(itoh4(((c>>4)&0xf))); - (*o_mputc)(itoh4((c&0xf))); + (*o_mputc)(bin2hex(((c>>4)&0xf))); + (*o_mputc)(bin2hex((c&0xf))); base64_count += 3; } else { (*o_mputc)(c); @@ -5641,32 +5959,34 @@ void mimeout_addchar(nkf_char c) void mime_prechar(nkf_char c2, nkf_char c1) { - if (mimeout_mode){ + if (mimeout_mode > 0){ if (c2 == EOF){ if (base64_count + mimeout_buf_count/3*4> 73){ (*o_base64conv)(EOF,0); - (*o_base64conv)(0,LF); + OCONV_NEWLINE((*o_base64conv)); (*o_base64conv)(0,SP); + base64_count = 1; } - } else if (c2){ - if (base64_count + mimeout_buf_count/3*4> 66){ + } else { + if (base64_count + mimeout_buf_count/3*4> 66) { (*o_base64conv)(EOF,0); - (*o_base64conv)(0,LF); - (*o_base64conv)(0,SP); - } - }/*else if (mime_lastchar2){ - if (c1 <=DEL && !nkf_isspace(c1)){ + OCONV_NEWLINE((*o_base64conv)); (*o_base64conv)(0,SP); + base64_count = 1; + mimeout_mode = -1; } - }*/ - }/*else{ - if (c2 && mime_lastchar2 == 0 - && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){ - (*o_base64conv)(0,SP); } - }*/ - /*mime_lastchar2 = c2; - mime_lastchar1 = c1;*/ + } else if (c2) { + if (c2 != EOF && base64_count + mimeout_buf_count/3*4> 60) { + mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B'; + open_mime(output_mode); + (*o_base64conv)(EOF,0); + OCONV_NEWLINE((*o_base64conv)); + (*o_base64conv)(0,SP); + base64_count = 1; + mimeout_mode = -1; + } + } } void mime_putc(nkf_char c) @@ -5679,14 +5999,14 @@ void mime_putc(nkf_char c) if (base64_count > 71){ if (c!=CR && c!=LF) { (*o_mputc)('='); - (*o_mputc)(LF); + PUT_NEWLINE((*o_mputc)); } base64_count = 0; } }else{ if (base64_count > 71){ eof_mime(); - (*o_mputc)(LF); + PUT_NEWLINE((*o_mputc)); base64_count = 0; } if (c == EOF) { /* c==EOF */ @@ -5702,10 +6022,11 @@ void mime_putc(nkf_char c) /* mimeout_f != FIXED_MIME */ if (c == EOF) { /* c==EOF */ + if (mimeout_mode == -1 && mimeout_buf_count > 1) open_mime(output_mode); j = mimeout_buf_count; mimeout_buf_count = 0; i = 0; - if (mimeout_mode) { + if (mimeout_mode > 0) { if (!nkf_isblank(mimeout_buf[j-1])) { for (;i 0){ + lastchar = mimeout_buf[mimeout_buf_count - 1]; + }else{ + lastchar = -1; + } + if (mimeout_mode=='Q') { - if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) { + if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) { if (c == CR || c == LF) { close_mime(); (*o_mputc)(c); @@ -5741,31 +6068,46 @@ void mime_putc(nkf_char c) } else if (c <= SP) { close_mime(); if (base64_count > 70) { - (*o_mputc)(LF); + PUT_NEWLINE((*o_mputc)); base64_count = 0; } if (!nkf_isblank(c)) { (*o_mputc)(SP); base64_count++; } - } - (*o_mputc)(c); - base64_count++; + } else { + if (base64_count > 70) { + close_mime(); + PUT_NEWLINE((*o_mputc)); + (*o_mputc)(SP); + base64_count = 1; + open_mime(output_mode); + } + if (!nkf_noescape_mime(c)) { + mimeout_addchar(c); + return; + } + } + (*o_mputc)(c); + base64_count++; } return; } - if (mimeout_buf_count > 0){ - lastchar = mimeout_buf[mimeout_buf_count - 1]; - }else{ - lastchar = -1; - } - - if (!mimeout_mode) { - if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) { + if (mimeout_mode <= 0) { + if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) { if (nkf_isspace(c)) { + int flag = 0; + if (mimeout_mode == -1) { + flag = 1; + } if (c==CR || c==LF) { - base64_count=0; + if (flag) { + open_mime(output_mode); + output_mode = 0; + } else { + base64_count = 0; + } } for (i=0;i 1 && base64_count + mimeout_buf_count > 76 && mimeout_buf[0] != CR && mimeout_buf[0] != LF){ - (*o_mputc)(LF); + PUT_NEWLINE((*o_mputc)); base64_count = 0; if (!nkf_isspace(mimeout_buf[0])){ (*o_mputc)(SP); @@ -5814,7 +6161,7 @@ void mime_putc(nkf_char c) } }else{ /* mimeout_mode == 'B', 1, 2 */ - if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) { + if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) { if (lastchar == CR || lastchar == LF){ if (nkf_isblank(c)) { for (i=0;i0 && SPMIMEOUT_BUF_LENGTH) { @@ -5902,17 +6251,13 @@ void reinit(void) hira_f = FALSE; input_f = FALSE; alpha_f = FALSE; - mime_f = STRICT_MIME; + mime_f = MIME_DECODE_DEFAULT; mime_decode_f = FALSE; mimebuf_f = FALSE; broken_f = FALSE; iso8859_f = FALSE; mimeout_f = FALSE; -#if defined(MSDOS) || defined(__OS2__) - x0201_f = TRUE; -#else - x0201_f = NO_X0201; -#endif + x0201_f = X0201_DEFAULT; iso2022jp_f = FALSE; #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) ms_ucs_map_f = UCS_MAP_ASCII; @@ -5940,7 +6285,7 @@ void reinit(void) noout_f = FALSE; debug_f = FALSE; #endif - guess_f = FALSE; + guess_f = 0; #ifdef EXEC_IO exec_f = 0; #endif @@ -5995,7 +6340,7 @@ void reinit(void) mime_decode_mode = FALSE; file_out_f = FALSE; nlmode_f = 0; - input_nextline = 0; + input_newline = 0; prev_cr = 0; option_mode = 0; broken_counter = 0; @@ -6005,6 +6350,7 @@ void reinit(void) iconv_for_check = 0; #endif input_codename = NULL; + output_encoding = nkf_enc_from_index(DEFAULT_ENCODING); #ifdef WIN32DLL reinitdll(); #endif /*WIN32DLL*/ @@ -6028,98 +6374,131 @@ nkf_char no_connection2(nkf_char c2, nkf_char c1, nkf_char c0) #endif void usage(void) { - fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"); - fprintf(stderr,"Flags:\n"); - fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n"); + fprintf(HELP_OUTPUT,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"); + fprintf(HELP_OUTPUT,"Flags:\n"); + fprintf(HELP_OUTPUT,"b,u Output is buffered (DEFAULT),Output is unbuffered\n"); #ifdef DEFAULT_CODE_SJIS - fprintf(stderr,"j,s,e,w Output code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n"); + fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n"); #endif #ifdef DEFAULT_CODE_JIS - fprintf(stderr,"j,s,e,w Output code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n"); + fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n"); #endif #ifdef DEFAULT_CODE_EUC - fprintf(stderr,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n"); + fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n"); #endif #ifdef DEFAULT_CODE_UTF8 - fprintf(stderr,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n"); + fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n"); #endif #ifdef UTF8_OUTPUT_ENABLE - fprintf(stderr," After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"); + fprintf(HELP_OUTPUT," After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"); #endif - fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"); + fprintf(HELP_OUTPUT,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"); #ifdef UTF8_INPUT_ENABLE - fprintf(stderr," After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"); -#endif - fprintf(stderr,"t no conversion\n"); - fprintf(stderr,"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"); - fprintf(stderr,"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"); - fprintf(stderr,"r {de/en}crypt ROT13/47\n"); - fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"); - fprintf(stderr,"v Show this usage. V: show version\n"); - fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n"); - fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n"); - fprintf(stderr,"l ISO8859-1 (Latin-1) support\n"); - fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"); - fprintf(stderr,"Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"); - fprintf(stderr," 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"); - fprintf(stderr," 4: JISX0208 Katakana to JISX0201 Katakana\n"); - fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"); - fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"); + fprintf(HELP_OUTPUT," After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"); +#endif + fprintf(HELP_OUTPUT,"t no conversion\n"); + fprintf(HELP_OUTPUT,"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"); + fprintf(HELP_OUTPUT,"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"); + fprintf(HELP_OUTPUT,"r {de/en}crypt ROT13/47\n"); + fprintf(HELP_OUTPUT,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"); + fprintf(HELP_OUTPUT,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n"); + fprintf(HELP_OUTPUT,"M[BQ] MIME encode [B:base64 Q:quoted]\n"); + fprintf(HELP_OUTPUT,"l ISO8859-1 (Latin-1) support\n"); + fprintf(HELP_OUTPUT,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"); + fprintf(HELP_OUTPUT,"Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"); + fprintf(HELP_OUTPUT," 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"); + fprintf(HELP_OUTPUT," 4: JISX0208 Katakana to JISX0201 Katakana\n"); + fprintf(HELP_OUTPUT,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"); + fprintf(HELP_OUTPUT,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"); #ifdef MSDOS - fprintf(stderr,"T Text mode output\n"); -#endif - fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n"); - fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n"); - fprintf(stderr,"d,c Convert line breaks -d: LF -c: CRLF\n"); - fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"); - fprintf(stderr,"\n"); - fprintf(stderr,"Long name options\n"); - fprintf(stderr," --ic= --oc=\n"); - fprintf(stderr," Specify the input or output codeset\n"); - fprintf(stderr," --fj --unix --mac --windows\n"); - fprintf(stderr," --jis --euc --sjis --utf8 --utf16 --mime --base64\n"); - fprintf(stderr," Convert for the system or code\n"); - fprintf(stderr," --hiragana --katakana --katakana-hiragana\n"); - fprintf(stderr," To Hiragana/Katakana Conversion\n"); - fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n"); + fprintf(HELP_OUTPUT,"T Text mode output\n"); +#endif + fprintf(HELP_OUTPUT,"O Output to File (DEFAULT 'nkf.out')\n"); + fprintf(HELP_OUTPUT,"I Convert non ISO-2022-JP charactor to GETA\n"); + fprintf(HELP_OUTPUT,"d,c Convert line breaks -d: LF -c: CRLF\n"); + fprintf(HELP_OUTPUT,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"); + fprintf(HELP_OUTPUT,"v, V Show this usage. V: show configuration\n"); + fprintf(HELP_OUTPUT,"\n"); + fprintf(HELP_OUTPUT,"Long name options\n"); + fprintf(HELP_OUTPUT," --ic= --oc=\n"); + fprintf(HELP_OUTPUT," Specify the input or output codeset\n"); + fprintf(HELP_OUTPUT," --fj --unix --mac --windows\n"); + fprintf(HELP_OUTPUT," --jis --euc --sjis --utf8 --utf16 --mime --base64\n"); + fprintf(HELP_OUTPUT," Convert for the system or code\n"); + fprintf(HELP_OUTPUT," --hiragana --katakana --katakana-hiragana\n"); + fprintf(HELP_OUTPUT," To Hiragana/Katakana Conversion\n"); + fprintf(HELP_OUTPUT," --prefix= Insert escape before troublesome characters of Shift_JIS\n"); #ifdef INPUT_OPTION - fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n"); + fprintf(HELP_OUTPUT," --cap-input, --url-input Convert hex after ':' or '%%'\n"); #endif #ifdef NUMCHAR_OPTION - fprintf(stderr," --numchar-input Convert Unicode Character Reference\n"); + fprintf(HELP_OUTPUT," --numchar-input Convert Unicode Character Reference\n"); #endif #ifdef UTF8_INPUT_ENABLE - fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n"); - fprintf(stderr," Specify how nkf handles unassigned characters\n"); + fprintf(HELP_OUTPUT," --fb-{skip, html, xml, perl, java, subchar}\n"); + fprintf(HELP_OUTPUT," Specify how nkf handles unassigned characters\n"); #endif #ifdef OVERWRITE - fprintf(stderr," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n"); - fprintf(stderr," Overwrite original listed files by filtered result\n"); - fprintf(stderr," --overwrite preserves timestamp of original files\n"); -#endif - fprintf(stderr," -g --guess Guess the input code\n"); - fprintf(stderr," --help --version Show this help/the version\n"); - fprintf(stderr," For more information, see also man nkf\n"); - fprintf(stderr,"\n"); + fprintf(HELP_OUTPUT," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n"); + fprintf(HELP_OUTPUT," Overwrite original listed files by filtered result\n"); + fprintf(HELP_OUTPUT," --overwrite preserves timestamp of original files\n"); +#endif + fprintf(HELP_OUTPUT," -g --guess Guess the input code\n"); + fprintf(HELP_OUTPUT," --help --version Show this help/the version\n"); + fprintf(HELP_OUTPUT," For more information, see also man nkf\n"); + fprintf(HELP_OUTPUT,"\n"); version(); } -void version(void) +void show_configuration(void) { - fprintf(stderr,"Network Kanji Filter Version %s (%s) " -#if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__) && !defined(__OS2__) - "for DOS" + fprintf(HELP_OUTPUT, "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"); + fprintf(HELP_OUTPUT, " Compile-time options:\n"); + fprintf(HELP_OUTPUT, " Default output encoding: " +#if defined(DEFAULT_CODE_JIS) + "ISO-2022-JP" +#elif defined(DEFAULT_CODE_SJIS) + "Shift_JIS" +#elif defined(DEFAULT_CODE_EUC) + "EUC-JP" +#elif defined(DEFAULT_CODE_UTF8) + "UTF-8" +#endif + "\n"); + fprintf(HELP_OUTPUT, " Default output newline: " +#if DEFAULT_NEWLINE == CR + "CR" +#elif DEFAULT_NEWLINE == CRLF + "CRLF" +#else + "LF" #endif -#if defined(MSDOS) && defined(__WIN16__) - "for Win16" + "\n"); + fprintf(HELP_OUTPUT, " Decode MIME encoded string: " +#if MIME_DECODE_DEFAULT + "ON" +#else + "OFF" #endif -#if defined(MSDOS) && defined(__WIN32__) - "for Win32" + "\n"); + fprintf(HELP_OUTPUT, " Convert JIS X 0201 Katakana: " +#if X0201_DEFAULT + "ON" +#else + "OFF" #endif -#ifdef __OS2__ - "for OS/2" + "\n"); +fprintf(HELP_OUTPUT, " --help, --version output: " +#if HELP_OUTPUT_HELP_OUTPUT +"HELP_OUTPUT" +#else +"STDOUT" #endif - ,NKF_VERSION,NKF_RELEASE_DATE); - fprintf(stderr,"\n%s\n",CopyRight); +"\n"); +} + +void version(void) +{ + fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n"); } #endif /*PERL_XS*/