OSDN Git Service

* refine file structure.
[nkf/nkf.git] / nkf.c
diff --git a/nkf.c b/nkf.c
index 24d52ad..e265e78 100644 (file)
--- a/nkf.c
+++ b/nkf.c
  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
  * http://sourceforge.jp/projects/nkf/
 ***********************************************************************/
-/* $Id: nkf.c,v 1.158 2007/12/23 07:25:47 naruse Exp $ */
+#define NKF_IDENT "$Id: nkf.c,v 1.166 2008/01/23 09:10:25 naruse Exp $"
 #define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2007-12-22"
+#define NKF_RELEASE_DATE "2008-01-23"
 #define COPY_RIGHT \
     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
-    "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
+    "Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
 
 #include "config.h"
+#include "nkf.h"
 #include "utf8tbl.h"
 
-#if defined(DEFAULT_CODE_JIS)
-#elif defined(DEFAULT_CODE_SJIS)
-#elif defined(DEFAULT_CODE_EUC)
-#elif defined(DEFAULT_CODE_UTF8)
-#else
-#define DEFAULT_CODE_JIS 1
-#endif
-
-#ifndef MIME_DECODE_DEFAULT
-#define MIME_DECODE_DEFAULT STRICT_MIME
-#endif
-#ifndef X0201_DEFAULT
-#define X0201_DEFAULT TRUE
-#endif
-
-#if DEFAULT_NEWLINE == 0x0D0A
-#define PUT_NEWLINE(func) do {\
-    func(0x0D);\
-    func(0x0A);\
-} while (0)
-#define OCONV_NEWLINE(func) do {\
-    func(0, 0x0D);\
-    func(0, 0x0A);\
-} while (0)
-#elif DEFAULT_NEWLINE == 0x0D
-#define PUT_NEWLINE(func) func(0x0D)
-#define OCONV_NEWLINE(func) func(0, 0x0D)
-#else
-#define DEFAULT_NEWLINE 0x0A
-#define PUT_NEWLINE(func) func(0x0A)
-#define OCONV_NEWLINE(func) func(0, 0x0A)
-#endif
-#ifdef HELP_OUTPUT_STDERR
-#define HELP_OUTPUT stderr
-#else
-#define HELP_OUTPUT stdout
-#endif
-
-#if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
-#define MSDOS
-#if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
-#define __WIN32__
-#endif
-#endif
-
-#ifdef PERL_XS
-#undef OVERWRITE
-#endif
-
-#ifndef PERL_XS
-#include <stdio.h>
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-
-#if defined(MSDOS) || defined(__OS2__)
-#include <fcntl.h>
-#include <io.h>
-#if defined(_MSC_VER) || defined(__WATCOMC__)
-#define mktemp _mktemp
-#endif
-#endif
-
-#ifdef MSDOS
-#ifdef LSI_C
-#define setbinmode(fp) fsetbin(fp)
-#elif defined(__DJGPP__)
-#include <libc/dosio.h>
-#define setbinmode(fp) djgpp_setbinmode(fp)
-#else /* Microsoft C, Turbo C */
-#define setbinmode(fp) setmode(fileno(fp), O_BINARY)
-#endif
-#else /* UNIX */
-#define setbinmode(fp)
-#endif
-
-#if defined(__DJGPP__)
-void  djgpp_setbinmode(FILE *fp)
-{
-    /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
-    int fd, m;
-    fd = fileno(fp);
-    m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
-    __file_handle_set(fd, m);
-}
-#endif
-
-#ifdef _IOFBF /* SysV and MSDOS, Windows */
-#define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
-#else /* BSD */
-#define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
-#endif
-
-/*Borland C++ 4.5 EasyWin*/
-#if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
-#define         EASYWIN
-#ifndef __WIN16__
-#define __WIN16__
-#endif
-#include <windows.h>
-#endif
-
-#ifdef OVERWRITE
-/* added by satoru@isoternet.org */
-#if defined(__EMX__)
-#include <sys/types.h>
-#endif
-#include <sys/stat.h>
-#if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
-#include <unistd.h>
-#if defined(__WATCOMC__)
-#include <sys/utime.h>
-#else
-#include <utime.h>
-#endif
-#else /* defined(MSDOS) */
-#ifdef __WIN32__
-#ifdef __BORLANDC__ /* BCC32 */
-#include <utime.h>
-#else /* !defined(__BORLANDC__) */
-#include <sys/utime.h>
-#endif /* (__BORLANDC__) */
-#else /* !defined(__WIN32__) */
-#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
-#include <sys/utime.h>
-#elif defined(__TURBOC__) /* BCC */
-#include <utime.h>
-#elif defined(LSI_C) /* LSI C */
-#endif /* (__WIN32__) */
-#endif
-#endif
-#endif
-
-#define         FALSE   0
-#define         TRUE    1
-
 /* state of output_mode and input_mode
 
    c2           0 means ASCII
@@ -188,28 +52,18 @@ void  djgpp_setbinmode(FILE *fp)
 
  */
 
-/* Input Assumption */
-
-#define         JIS_INPUT       4
-#define         EUC_INPUT      16
-#define         SJIS_INPUT      5
-#define         LATIN1_INPUT    6
-#define                UTF8_INPUT     13
-#define                UTF16_INPUT    1015
-#define                UTF32_INPUT    1017
+/* MIME ENCODE */
 
 #define         FIXED_MIME      7
 #define         STRICT_MIME     8
 
-/* MIME ENCODE */
-
-
 /* byte order */
-
-#define                ENDIAN_BIG      1234
-#define                ENDIAN_LITTLE   4321
-#define                ENDIAN_2143     2143
-#define                ENDIAN_3412     3412
+enum byte_order {
+    ENDIAN_BIG    = 1,
+    ENDIAN_LITTLE = 2,
+    ENDIAN_2143   = 3,
+    ENDIAN_3412   = 4
+};
 
 /* ASCII CODE */
 
@@ -265,11 +119,12 @@ enum nkf_encodings {
     UTF_32BE_BOM,
     UTF_32LE,
     UTF_32LE_BOM,
+    NKF_ENCODING_TABLE_SIZE,
     JIS_X_0201=0x1000,
-    JIS_X_0208,
-    JIS_X_0212,
-    JIS_X_0213_1,
-    JIS_X_0213_2,
+    JIS_X_0208=0x1001,
+    JIS_X_0212=0x1002,
+    JIS_X_0213_1=0x1003,
+    JIS_X_0213_2=0x1004,
     BINARY
 };
 
@@ -286,12 +141,12 @@ void w_oconv16(nkf_char c2, nkf_char c1);
 void w_oconv32(nkf_char c2, nkf_char c1);
 
 typedef struct {
-    char *name;
-    nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
-    void (*oconv_func)(nkf_char c2, nkf_char c1);
+    const char *name;
+    nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
+    void (*oconv)(nkf_char c2, nkf_char c1);
 } nkf_native_encoding;
 
-nkf_native_encoding NkfEncodingASCII =         { "US_ASCII", e_iconv, e_oconv };
+nkf_native_encoding NkfEncodingASCII =         { "ASCII", e_iconv, e_oconv };
 nkf_native_encoding NkfEncodingISO_2022_JP =   { "ISO-2022-JP", e_iconv, j_oconv };
 nkf_native_encoding NkfEncodingShift_JIS =     { "Shift_JIS", s_iconv, s_oconv };
 nkf_native_encoding NkfEncodingEUC_JP =                { "EUC-JP", e_iconv, e_oconv };
@@ -300,21 +155,22 @@ nkf_native_encoding NkfEncodingUTF_16 =           { "UTF-16", w_iconv16, w_oconv16 };
 nkf_native_encoding NkfEncodingUTF_32 =                { "UTF-32", w_iconv32, w_oconv32 };
 
 typedef struct {
-    int id;
-    char *name;
-    nkf_native_encoding *based_encoding;
+    const int id;
+    const char *name;
+    const nkf_native_encoding *base_encoding;
 } nkf_encoding;
+
 nkf_encoding nkf_encoding_table[] = {
-    {ASCII,            "ASCII",                &NkfEncodingASCII},
+    {ASCII,            "US-ASCII",             &NkfEncodingASCII},
     {ISO_8859_1,       "ISO-8859-1",           &NkfEncodingASCII},
-    {ISO_2022_JP,      "ISO-2022-JP",          &NkfEncodingASCII},
+    {ISO_2022_JP,      "ISO-2022-JP",          &NkfEncodingISO_2022_JP},
     {CP50220,          "CP50220",              &NkfEncodingISO_2022_JP},
     {CP50221,          "CP50221",              &NkfEncodingISO_2022_JP},
     {CP50222,          "CP50222",              &NkfEncodingISO_2022_JP},
     {ISO_2022_JP_1,    "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
     {ISO_2022_JP_3,    "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
     {SHIFT_JIS,                "Shift_JIS",            &NkfEncodingShift_JIS},
-    {WINDOWS_31J,      "WINDOWS-31J",          &NkfEncodingShift_JIS},
+    {WINDOWS_31J,      "Windows-31J",          &NkfEncodingShift_JIS},
     {CP10001,          "CP10001",              &NkfEncodingShift_JIS},
     {EUC_JP,           "EUC-JP",               &NkfEncodingEUC_JP},
     {CP51932,          "CP51932",              &NkfEncodingEUC_JP},
@@ -341,14 +197,15 @@ nkf_encoding nkf_encoding_table[] = {
     {BINARY,           "BINARY",               &NkfEncodingASCII},
     {-1,               NULL,                   NULL}
 };
-#define NKF_ENCODING_TABLE_SIZE 34
+
 struct {
     const char *name;
     const int id;
 } encoding_name_to_id_table[] = {
+    {"US-ASCII",               ASCII},
     {"ASCII",                  ASCII},
     {"ISO-2022-JP",            ISO_2022_JP},
-    {"X-ISO2022JP-CP932",      CP50220},
+    {"ISO2022JP-CP932",                CP50220},
     {"CP50220",                        CP50220},
     {"CP50221",                        CP50221},
     {"CP50222",                        CP50222},
@@ -391,14 +248,15 @@ struct {
     {"BINARY",                 BINARY},
     {NULL,                     -1}
 };
+
 #if defined(DEFAULT_CODE_JIS)
-#define            DEFAULT_ENCODING ISO_2022_JP
+#define            DEFAULT_ENCIDX ISO_2022_JP
 #elif defined(DEFAULT_CODE_SJIS)
-#define            DEFAULT_ENCODING SHIFT_JIS
+#define            DEFAULT_ENCIDX SHIFT_JIS
 #elif defined(DEFAULT_CODE_EUC)
-#define            DEFAULT_ENCODING EUC_JP
+#define            DEFAULT_ENCIDX EUC_JP
 #elif defined(DEFAULT_CODE_UTF8)
-#define            DEFAULT_ENCODING UTF_8
+#define            DEFAULT_ENCIDX UTF_8
 #endif
 
 
@@ -425,10 +283,6 @@ struct {
     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
 
-#define CP932_TABLE_BEGIN 0xFA
-#define CP932_TABLE_END   0xFC
-#define CP932INV_TABLE_BEGIN 0xED
-#define CP932INV_TABLE_END   0xEE
 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
 
 #define         HOLD_SIZE       1024
@@ -441,23 +295,12 @@ struct {
 #define         DEFAULT_J       'B'
 #define         DEFAULT_R       'B'
 
-#define         SJ0162  0x00e1          /* 01 - 62 ku offset */
-#define         SJ6394  0x0161          /* 63 - 94 ku offset */
 
 #define         RANGE_NUM_MAX   18
 #define         GETA1   0x22
 #define         GETA2   0x2e
 
 
-#if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
-#define sizeof_euc_to_utf8_1byte 94
-#define sizeof_euc_to_utf8_2bytes 94
-#define sizeof_utf8_to_euc_C2 64
-#define sizeof_utf8_to_euc_E5B8 64
-#define sizeof_utf8_to_euc_2bytes 112
-#define sizeof_utf8_to_euc_3bytes 16
-#endif
-
 /* MIME preprocessor */
 
 #ifdef EASYWIN /*Easy Win */
@@ -476,7 +319,9 @@ struct input_code{
 };
 
 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
-static nkf_encoding *output_encoding;
+static nkf_encoding *input_encoding = NULL;
+static nkf_encoding *output_encoding = NULL;
+static void set_output_encoding(nkf_encoding *enc);
 
 #if !defined(PERL_XS) && !defined(WIN32DLL)
 static  nkf_char     noconvert(FILE *f);
@@ -568,7 +413,6 @@ static  void    eof_mime(void);
 static  void    mimeout_addchar(nkf_char c);
 #ifndef PERL_XS
 static  void    usage(void);
-static  void    version(void);
 static  void    show_configuration(void);
 #endif
 static  void    options(unsigned char *c);
@@ -601,7 +445,6 @@ static int             nop_f = FALSE;
 static int             binmode_f = TRUE;       /* binary mode */
 static int             rot_f = FALSE;          /* rot14/43 mode */
 static int             hira_f = FALSE;          /* hira/kata henkan */
-static int             input_f = FALSE;        /* non fixed input code  */
 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
@@ -634,11 +477,6 @@ static nkf_char url_getc(FILE *f);
 static nkf_char url_ungetc(nkf_char c,FILE *f);
 #endif
 
-#if defined(INT_IS_SHORT)
-#define NKF_INT32_C(n)   (n##L)
-#else
-#define NKF_INT32_C(n)   (n)
-#endif
 #define PREFIX_EUCG3   NKF_INT32_C(0x8F00)
 #define CLASS_MASK     NKF_INT32_C(0xFF000000)
 #define CLASS_UNICODE  NKF_INT32_C(0x01000000)
@@ -737,27 +575,9 @@ static unsigned char   ascii_intro = DEFAULT_R;
 
 static int             fold_margin  = FOLD_MARGIN;
 
-/* converters */
-
-#ifdef DEFAULT_CODE_JIS
-#   define  DEFAULT_CONV j_oconv
-#endif
-#ifdef DEFAULT_CODE_SJIS
-#   define  DEFAULT_CONV s_oconv
-#endif
-#ifdef DEFAULT_CODE_EUC
-#   define  DEFAULT_CONV e_oconv
-#endif
-#ifdef DEFAULT_CODE_UTF8
-#   define  DEFAULT_CONV w_oconv
-#endif
-
 /* process default */
-static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
-
-static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
-/* s_iconv or oconv */
 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
+static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
 
 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
@@ -910,13 +730,13 @@ char* nkf_strcpy(const char *str)
     return result;
 }
 
-static void nkf_str_upcase(const char *str, char *res, size_t length)
+static void nkf_str_upcase(const char *src, char *dest, size_t length)
 {
     int i = 0;
-    for (; i < length && str[i]; i++) {
-       res[i] = nkf_toupper(str[i]);
+    for (; i < length && src[i]; i++) {
+       dest[i] = nkf_toupper(src[i]);
     }
-    res[i] = 0;
+    dest[i] = 0;
 }
 
 static nkf_encoding *nkf_enc_from_index(int idx)
@@ -930,6 +750,7 @@ static nkf_encoding *nkf_enc_from_index(int idx)
 static int nkf_enc_find_index(const char *name)
 {
     int i, index = -1;
+    if (*name == 'X' && *(name+1) == '-') name += 2;
     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
        if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
            return encoding_name_to_id_table[i].id;
@@ -948,7 +769,54 @@ static nkf_encoding *nkf_enc_find(const char *name)
 
 #define nkf_enc_name(enc) (enc)->name
 #define nkf_enc_to_index(enc) (enc)->id
-#define nkf_enc_to_base_encoding(enc) (enc)->based_encoding
+#define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
+#define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
+#define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
+#define nkf_enc_asciicompat(enc) (\
+    nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
+    nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
+#define nkf_enc_unicode_p(enc) (\
+    nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
+    nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
+    nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
+#define nkf_enc_cp5022x_p(enc) (\
+    nkf_enc_to_index(enc) == CP50220 ||\
+    nkf_enc_to_index(enc) == CP50221 ||\
+    nkf_enc_to_index(enc) == CP50222)
+
+#ifndef DEFAULT_ENCIDX
+static char* nkf_locale_charmap()
+{
+#ifdef HAVE_LANGINFO_H
+    return nl_langinfo(CODESET);
+#elif defined(__WIN32__)
+    return sprintf("CP%d", GetACP());
+#else
+    return NULL;
+#endif
+}
+
+static nkf_encoding* nkf_locale_encoding()
+{
+    nkf_encoding *enc = 0;
+    char *encname = nkf_locale_charmap();
+    if (encname)
+       enc = nkf_enc_find(encname);
+    if (enc < 0) enc = 0;
+    return enc;
+}
+#endif
+
+static nkf_encoding* nkf_default_encoding()
+{
+#ifdef DEFAULT_ENCIDX
+    return nkf_enc_from_index(DEFAULT_ENCIDX);
+#else
+    nkf_encoding *enc = nkf_locale_encoding();
+    if (enc <= 0) enc = nkf_enc_from_index(ISO_2022_JP);
+    return enc;
+#endif
+}
 
 #ifdef WIN32DLL
 #include "nkf32dll.c"
@@ -965,36 +833,11 @@ int main(int argc, char **argv)
 #ifdef EASYWIN /*Easy Win */
     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
 #endif
+    setlocale(LC_CTYPE, "");
 
     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
         cp = (unsigned char *)*argv;
         options(cp);
-        if (guess_f) {
-#ifdef CHECK_OPTION
-           int debug_f_back = debug_f;
-#endif
-#ifdef EXEC_IO
-           int exec_f_back = exec_f;
-#endif
-#ifdef X0212_ENABLE
-           int x0212_f_back = x0212_f;
-#endif
-           int x0213_f_back = x0213_f;
-           int guess_f_back = guess_f;
-           reinit();
-           guess_f = guess_f_back;
-           mime_f = FALSE;
-#ifdef CHECK_OPTION
-           debug_f = debug_f_back;
-#endif
-#ifdef EXEC_IO
-            exec_f = exec_f_back;
-#endif
-#ifdef X0212_ENABLE
-           x0212_f = x0212_f_back;
-#endif
-           x0213_f = x0213_f_back;
-       }
 #ifdef EXEC_IO
         if (exec_f){
             int fds[2], pid;
@@ -1024,6 +867,33 @@ int main(int argc, char **argv)
 #endif
     }
 
+    if (guess_f) {
+#ifdef CHECK_OPTION
+       int debug_f_back = debug_f;
+#endif
+#ifdef EXEC_IO
+       int exec_f_back = exec_f;
+#endif
+#ifdef X0212_ENABLE
+       int x0212_f_back = x0212_f;
+#endif
+       int x0213_f_back = x0213_f;
+       int guess_f_back = guess_f;
+       reinit();
+       guess_f = guess_f_back;
+       mime_f = FALSE;
+#ifdef CHECK_OPTION
+       debug_f = debug_f_back;
+#endif
+#ifdef EXEC_IO
+       exec_f = exec_f_back;
+#endif
+#ifdef X0212_ENABLE
+       x0212_f = x0212_f_back;
+#endif
+       x0213_f = x0213_f_back;
+    }
+
     if (binmode_f == TRUE)
 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
     if (freopen("","wb",stdout) == NULL)
@@ -1061,8 +931,7 @@ int main(int argc, char **argv)
            iconv_for_check = 0;
 #endif
           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
-              perror(*--argv);
-               *argv++;
+               perror(*(argv-1));
                is_argument_error = TRUE;
                continue;
           } else {
@@ -1295,7 +1164,7 @@ static const struct {
     {"katakana","h2"},
     {"katakana-hiragana","h3"},
     {"guess=", ""},
-    {"guess", "g1"},
+    {"guess", "g2"},
     {"cp932", ""},
     {"no-cp932", ""},
 #ifdef X0212_ENABLE
@@ -1349,354 +1218,309 @@ static const struct {
     {"prefix=", ""},
 };
 
-static int option_mode = 0;
-
-void options(unsigned char *cp)
+static void set_input_encoding(nkf_encoding *enc)
 {
-    nkf_char i, j;
-    unsigned char *p;
-    unsigned char *cp_back = NULL;
-    char codeset[32];
-    nkf_encoding *enc;
-
-    if (option_mode==1)
-       return;
-    while(*cp && *cp++!='-');
-    while (*cp || cp_back) {
-       if(!*cp){
-           cp = cp_back;
-           cp_back = NULL;
-           continue;
-       }
-       p = 0;
-        switch (*cp++) {
-        case '-':  /* literal options */
-           if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
-               option_mode = 1;
-               return;
-           }
-            for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
-                p = (unsigned char *)long_option[i].name;
-                for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
-               if (*p == cp[j] || cp[j] == SP){
-                   p = &cp[j] + 1;
-                   break;
-               }
-               p = 0;
-            }
-           if (p == 0) {
-               fprintf(stderr, "unknown long option: --%s\n", cp);
-               return;
-           }
-           while(*cp && *cp != SP && cp++);
-            if (long_option[i].alias[0]){
-               cp_back = cp;
-               cp = (unsigned char *)long_option[i].alias;
-           }else{
-                if (strcmp(long_option[i].name, "ic=") == 0){
-                   nkf_str_upcase(p, codeset, 32);
-                   enc = nkf_enc_find(codeset);
-                   switch (nkf_enc_to_index(enc)) {
-                   case ISO_2022_JP:
-                       input_f = JIS_INPUT;
-                       break;
-                   case CP50220:
-                   case CP50221:
-                   case CP50222:
-                       input_f = JIS_INPUT;
+    switch (nkf_enc_to_index(enc)) {
+    case CP50220:
+    case CP50221:
+    case CP50222:
 #ifdef SHIFTJIS_CP932
-                       cp51932_f = TRUE;
+       cp51932_f = TRUE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_CP932;
+       ms_ucs_map_f = UCS_MAP_CP932;
 #endif
-                       break;
-                   case ISO_2022_JP_1:
-                       input_f = JIS_INPUT;
+       break;
+    case ISO_2022_JP_1:
 #ifdef X0212_ENABLE
-                       x0212_f = TRUE;
+       x0212_f = TRUE;
 #endif
-                       break;
-                   case ISO_2022_JP_3:
-                       input_f = JIS_INPUT;
+       break;
+    case ISO_2022_JP_3:
 #ifdef X0212_ENABLE
-                       x0212_f = TRUE;
+       x0212_f = TRUE;
 #endif
-                       x0213_f = TRUE;
-                       break;
-                   case SHIFT_JIS:
-                       input_f = SJIS_INPUT;
-                       break;
-                   case WINDOWS_31J:
-                       input_f = SJIS_INPUT;
+       x0213_f = TRUE;
+       break;
+    case SHIFT_JIS:
+       break;
+    case WINDOWS_31J:
 #ifdef SHIFTJIS_CP932
-                       cp51932_f = TRUE;
+       cp51932_f = TRUE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_CP932;
+       ms_ucs_map_f = UCS_MAP_CP932;
 #endif
-                       break;
-                   case CP10001:
-                       input_f = SJIS_INPUT;
+       break;
+    case EUC_JP:
+       break;
+    case CP10001:
 #ifdef SHIFTJIS_CP932
-                       cp51932_f = TRUE;
+       cp51932_f = TRUE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_CP10001;
+       ms_ucs_map_f = UCS_MAP_CP10001;
 #endif
-                       break;
-                   case EUC_JP:
-                       input_f = EUC_INPUT;
-                       break;
-                   case CP51932:
-                       input_f = EUC_INPUT;
+       break;
+    case CP51932:
 #ifdef SHIFTJIS_CP932
-                       cp51932_f = TRUE;
+       cp51932_f = TRUE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_CP932;
+       ms_ucs_map_f = UCS_MAP_CP932;
 #endif
-                       break;
-                   case EUCJP_MS:
-                       input_f = EUC_INPUT;
+       break;
+    case EUCJP_MS:
 #ifdef SHIFTJIS_CP932
-                       cp51932_f = FALSE;
+       cp51932_f = FALSE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_MS;
+       ms_ucs_map_f = UCS_MAP_MS;
 #endif
-                       break;
-                   case EUCJP_ASCII:
-                       input_f = EUC_INPUT;
+       break;
+    case EUCJP_ASCII:
 #ifdef SHIFTJIS_CP932
-                       cp51932_f = FALSE;
+       cp51932_f = FALSE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_ASCII;
+       ms_ucs_map_f = UCS_MAP_ASCII;
 #endif
-                       break;
-                   case SHIFT_JISX0213:
-                   case SHIFT_JIS_2004:
-                       input_f = SJIS_INPUT;
-                       x0213_f = TRUE;
+       break;
+    case SHIFT_JISX0213:
+    case SHIFT_JIS_2004:
+       x0213_f = TRUE;
 #ifdef SHIFTJIS_CP932
-                       cp51932_f = FALSE;
+       cp51932_f = FALSE;
 #endif
-                       break;
-                   case EUC_JISX0213:
-                   case EUC_JIS_2004:
-                       input_f = EUC_INPUT;
-                       x0213_f = TRUE;
+       break;
+    case EUC_JISX0213:
+    case EUC_JIS_2004:
+       x0213_f = TRUE;
 #ifdef SHIFTJIS_CP932
-                       cp51932_f = FALSE;
+       cp51932_f = FALSE;
 #endif
-                       break;
+       break;
 #ifdef UTF8_INPUT_ENABLE
-                   case UTF_8:
-                   case UTF_8N:
-                   case UTF_8_BOM:
-                       input_f = UTF8_INPUT;
-                       break;
 #ifdef UNICODE_NORMALIZATION
-                   case UTF8_MAC:
-                       input_f = UTF8_INPUT;
-                       nfc_f = TRUE;
-                       break;
+    case UTF8_MAC:
+       nfc_f = TRUE;
+       break;
 #endif
-                   case UTF_16:
-                   case UTF_16BE:
-                   case UTF_16BE_BOM:
-                       input_f = UTF16_INPUT;
-                       input_endian = ENDIAN_BIG;
-                       break;
-                   case UTF_16LE:
-                   case UTF_16LE_BOM:
-                       input_f = UTF16_INPUT;
-                       input_endian = ENDIAN_LITTLE;
-                       break;
-                   case UTF_32:
-                   case UTF_32BE:
-                   case UTF_32BE_BOM:
-                       input_f = UTF32_INPUT;
-                       input_endian = ENDIAN_BIG;
-                       break;
-                   case UTF_32LE:
-                   case UTF_32LE_BOM:
-                       input_f = UTF32_INPUT;
-                       input_endian = ENDIAN_LITTLE;
-                       break;
+    case UTF_16:
+    case UTF_16BE:
+    case UTF_16BE_BOM:
+       input_endian = ENDIAN_BIG;
+       break;
+    case UTF_16LE:
+    case UTF_16LE_BOM:
+       input_endian = ENDIAN_LITTLE;
+       break;
+    case UTF_32:
+    case UTF_32BE:
+    case UTF_32BE_BOM:
+       input_endian = ENDIAN_BIG;
+       break;
+    case UTF_32LE:
+    case UTF_32LE_BOM:
+       input_endian = ENDIAN_LITTLE;
+       break;
 #endif
-                   default:
-                       fprintf(stderr, "unknown input encoding: %s\n", codeset);
-                       break;
-                   }
-                    continue;
-               }
-                if (strcmp(long_option[i].name, "oc=") == 0){
-                   x0201_f = FALSE;
-                   nkf_str_upcase(p, codeset, 32);
-                   output_encoding = nkf_enc_find(codeset);
-                   switch (nkf_enc_to_index(output_encoding)) {
-                   case ISO_2022_JP:
-                       output_conv = j_oconv;
-                       break;
-                   case CP50220:
-                           output_conv = j_oconv;
-                           x0201_f = TRUE;
+    }
+}
+
+static void set_output_encoding(nkf_encoding *enc)
+{
+    switch (nkf_enc_to_index(enc)) {
+    case CP50220:
+       x0201_f = TRUE;
 #ifdef SHIFTJIS_CP932
-                           cp932inv_f = FALSE;
+       if (cp932inv_f == TRUE) cp932inv_f = FALSE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-                           ms_ucs_map_f = UCS_MAP_CP932;
+       ms_ucs_map_f = UCS_MAP_CP932;
 #endif
-                       break;
-                   case CP50221:
-                       output_conv = j_oconv;
+       break;
+    case CP50221:
 #ifdef SHIFTJIS_CP932
-                       cp932inv_f = FALSE;
+       if (cp932inv_f == TRUE) cp932inv_f = FALSE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_CP932;
+       ms_ucs_map_f = UCS_MAP_CP932;
 #endif
-                       break;
-                   case ISO_2022_JP_1:
-                       output_conv = j_oconv;
+       break;
+    case ISO_2022_JP_1:
 #ifdef X0212_ENABLE
-                       x0212_f = TRUE;
+       x0212_f = TRUE;
 #endif
 #ifdef SHIFTJIS_CP932
-                       cp932inv_f = FALSE;
+       if (cp932inv_f == TRUE) cp932inv_f = FALSE;
 #endif
-                       break;
-                   case ISO_2022_JP_3:
-                       output_conv = j_oconv;
+       break;
+    case ISO_2022_JP_3:
 #ifdef X0212_ENABLE
-                       x0212_f = TRUE;
+       x0212_f = TRUE;
 #endif
-                       x0213_f = TRUE;
+       x0213_f = TRUE;
 #ifdef SHIFTJIS_CP932
-                       cp932inv_f = FALSE;
+       if (cp932inv_f == TRUE) cp932inv_f = FALSE;
 #endif
-                       break;
-                   case SHIFT_JIS:
-                       output_conv = s_oconv;
-                       break;
-                   case WINDOWS_31J:
-                       output_conv = s_oconv;
+       break;
+    case SHIFT_JIS:
+       break;
+    case WINDOWS_31J:
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_CP932;
+       ms_ucs_map_f = UCS_MAP_CP932;
 #endif
-                       break;
-                   case CP10001:
-                       output_conv = s_oconv;
+       break;
+    case CP10001:
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_CP10001;
+       ms_ucs_map_f = UCS_MAP_CP10001;
 #endif
-                       break;
-                   case EUC_JP:
-                       output_conv = e_oconv;
-                       break;
-                   case CP51932:
-                       output_conv = e_oconv;
+       break;
+    case EUC_JP:
+       x0212_f = TRUE;
 #ifdef SHIFTJIS_CP932
-                       cp932inv_f = FALSE;
+       if (cp932inv_f == TRUE) cp932inv_f = FALSE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_CP932;
+       ms_ucs_map_f = UCS_MAP_CP932;
 #endif
-                       break;
-                   case EUCJP_MS:
-                       output_conv = e_oconv;
+       break;
+    case CP51932:
+#ifdef SHIFTJIS_CP932
+       if (cp932inv_f == TRUE) cp932inv_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+       ms_ucs_map_f = UCS_MAP_CP932;
+#endif
+       break;
+    case EUCJP_MS:
 #ifdef X0212_ENABLE
-                       x0212_f = TRUE;
+       x0212_f = TRUE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_MS;
+       ms_ucs_map_f = UCS_MAP_MS;
 #endif
-                       break;
-                   case EUCJP_ASCII:
-                       output_conv = e_oconv;
+       break;
+    case EUCJP_ASCII:
 #ifdef X0212_ENABLE
-                       x0212_f = TRUE;
+       x0212_f = TRUE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-                       ms_ucs_map_f = UCS_MAP_ASCII;
+       ms_ucs_map_f = UCS_MAP_ASCII;
 #endif
-                       break;
-                   case SHIFT_JISX0213:
-                   case SHIFT_JIS_2004:
-                           output_conv = s_oconv;
-                           x0213_f = TRUE;
+       break;
+    case SHIFT_JISX0213:
+    case SHIFT_JIS_2004:
+       x0213_f = TRUE;
 #ifdef SHIFTJIS_CP932
-                           cp932inv_f = FALSE;
+       if (cp932inv_f == TRUE) cp932inv_f = FALSE;
 #endif
-                       break;
-                   case EUC_JISX0213:
-                   case EUC_JIS_2004:
-                       output_conv = e_oconv;
+       break;
+    case EUC_JISX0213:
+    case EUC_JIS_2004:
 #ifdef X0212_ENABLE
-                       x0212_f = TRUE;
+       x0212_f = TRUE;
 #endif
-                       x0213_f = TRUE;
+       x0213_f = TRUE;
 #ifdef SHIFTJIS_CP932
-                       cp932inv_f = FALSE;
+       if (cp932inv_f == TRUE) cp932inv_f = FALSE;
 #endif
-                       break;
+       break;
 #ifdef UTF8_OUTPUT_ENABLE
-                   case UTF_8:
-                   case UTF_8N:
-                       output_conv = w_oconv;
-                       break;
-                   case UTF_8_BOM:
-                       output_conv = w_oconv;
-                       output_bom_f = TRUE;
-                       break;
-                   case UTF_16BE:
-                       output_conv = w_oconv16;
-                       break;
-                   case UTF_16:
-                   case UTF_16BE_BOM:
-                       output_conv = w_oconv16;
-                       output_bom_f = TRUE;
-                       break;
-                   case UTF_16LE:
-                       output_conv = w_oconv16;
-                       output_endian = ENDIAN_LITTLE;
-                       break;
-                   case UTF_16LE_BOM:
-                       output_conv = w_oconv16;
-                       output_endian = ENDIAN_LITTLE;
-                       output_bom_f = TRUE;
-                       break;
-                   case UTF_32:
-                   case UTF_32BE:
-                       output_conv = w_oconv32;
-                       break;
-                   case UTF_32BE_BOM:
-                       output_conv = w_oconv32;
-                       output_bom_f = TRUE;
-                       break;
-                   case UTF_32LE:
-                       output_conv = w_oconv32;
-                       output_endian = ENDIAN_LITTLE;
-                       break;
-                   case UTF_32LE_BOM:
-                       output_conv = w_oconv32;
-                       output_endian = ENDIAN_LITTLE;
-                       output_bom_f = TRUE;
-                       break;
+    case UTF_8_BOM:
+       output_bom_f = TRUE;
+       break;
+    case UTF_16:
+    case UTF_16BE_BOM:
+       output_bom_f = TRUE;
+       break;
+    case UTF_16LE:
+       output_endian = ENDIAN_LITTLE;
+       output_bom_f = FALSE;
+       break;
+    case UTF_16LE_BOM:
+       output_endian = ENDIAN_LITTLE;
+       output_bom_f = TRUE;
+       break;
+    case UTF_32BE_BOM:
+       output_bom_f = TRUE;
+       break;
+    case UTF_32LE:
+       output_endian = ENDIAN_LITTLE;
+       output_bom_f = FALSE;
+       break;
+    case UTF_32LE_BOM:
+       output_endian = ENDIAN_LITTLE;
+       output_bom_f = TRUE;
+       break;
 #endif
-                   default:
-                       fprintf(stderr, "unknown output encoding: %s\n", codeset);
-                       break;
-                   }
+    }
+}
+
+static int option_mode = 0;
+
+void options(unsigned char *cp)
+{
+    nkf_char i, j;
+    unsigned char *p;
+    unsigned char *cp_back = NULL;
+    char codeset[32];
+    nkf_encoding *enc;
+
+    if (option_mode==1)
+       return;
+    while(*cp && *cp++!='-');
+    while (*cp || cp_back) {
+       if(!*cp){
+           cp = cp_back;
+           cp_back = NULL;
+           continue;
+       }
+       p = 0;
+        switch (*cp++) {
+        case '-':  /* literal options */
+           if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
+               option_mode = 1;
+               return;
+           }
+            for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
+                p = (unsigned char *)long_option[i].name;
+                for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
+               if (*p == cp[j] || cp[j] == SP){
+                   p = &cp[j] + 1;
+                   break;
+               }
+               p = 0;
+            }
+           if (p == 0) {
+               fprintf(stderr, "unknown long option: --%s\n", cp);
+               return;
+           }
+           while(*cp && *cp != SP && cp++);
+            if (long_option[i].alias[0]){
+               cp_back = cp;
+               cp = (unsigned char *)long_option[i].alias;
+           }else{
+                if (strcmp(long_option[i].name, "ic=") == 0){
+                   nkf_str_upcase((char *)p, codeset, 32);
+                   enc = nkf_enc_find(codeset);
+                   if (!enc) continue;
+                   input_encoding = enc;
+                    continue;
+               }
+                if (strcmp(long_option[i].name, "oc=") == 0){
+                   nkf_str_upcase((char *)p, codeset, 32);
+                   enc = nkf_enc_find(codeset);
+                   if (enc <= 0) continue;
+                   output_encoding = enc;
                     continue;
                }
                 if (strcmp(long_option[i].name, "guess=") == 0){
-                   if (p[0] == '1') {
-                       guess_f = 2;
-                   } else {
+                   if (p[0] == '0' || p[0] == '1') {
                        guess_f = 1;
+                   } else {
+                       guess_f = 2;
                    }
                     continue;
                 }
@@ -1761,7 +1585,7 @@ void options(unsigned char *cp)
                 if (strcmp(long_option[i].name, "cp932") == 0){
 #ifdef SHIFTJIS_CP932
                     cp51932_f = TRUE;
-                    cp932inv_f = TRUE;
+                    cp932inv_f = -TRUE;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
                     ms_ucs_map_f = UCS_MAP_CP932;
@@ -1780,7 +1604,7 @@ void options(unsigned char *cp)
                 }
 #ifdef SHIFTJIS_CP932
                 if (strcmp(long_option[i].name, "cp932inv") == 0){
-                    cp932inv_f = TRUE;
+                    cp932inv_f = -TRUE;
                     continue;
                 }
 #endif
@@ -1870,7 +1694,6 @@ void options(unsigned char *cp)
 #endif
 #ifdef UNICODE_NORMALIZATION
                if (strcmp(long_option[i].name, "utf8mac-input") == 0){
-                   input_f = UTF8_INPUT;
                    nfc_f = TRUE;
                    continue;
                }
@@ -1894,8 +1717,8 @@ void options(unsigned char *cp)
         case 't':           /* transparent mode */
             if (*cp=='1') {
                /* alias of -t */
+               cp++;
                nop_f = TRUE;
-               *cp++;
            } else if (*cp=='2') {
                /*
                 * -t with put/get
@@ -1903,28 +1726,24 @@ void options(unsigned char *cp)
                 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
                 *
                 */
+               cp++;
                nop_f = 2;
-               *cp++;
             } else
                nop_f = TRUE;
             continue;
         case 'j':           /* JIS output */
         case 'n':
-            output_conv = j_oconv;
             output_encoding = nkf_enc_from_index(ISO_2022_JP);
             continue;
         case 'e':           /* AT&T EUC output */
-            output_conv = e_oconv;
-            cp932inv_f = FALSE;
             output_encoding = nkf_enc_from_index(EUC_JP);
             continue;
         case 's':           /* SJIS output */
-            output_conv = s_oconv;
-            output_encoding = nkf_enc_from_index(SHIFT_JIS);
+            output_encoding = nkf_enc_from_index(WINDOWS_31J);
             continue;
         case 'l':           /* ISO8859 Latin-1 support, no conversion */
             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
-            input_f = LATIN1_INPUT;
+            input_encoding = nkf_enc_from_index(ISO_8859_1);
             continue;
         case 'i':           /* Kanji IN ESC-$-@/B */
             if (*cp=='@'||*cp=='B')
@@ -1965,7 +1784,7 @@ void options(unsigned char *cp)
 #ifdef UTF8_OUTPUT_ENABLE
         case 'w':           /* UTF-8 output */
             if (cp[0] == '8') {
-               output_conv = w_oconv; cp++;
+               cp++;
                if (cp[0] == '0'){
                    cp++;
                    output_encoding = nkf_enc_from_index(UTF_8N);
@@ -1976,13 +1795,12 @@ void options(unsigned char *cp)
            } else {
                int enc_idx;
                if ('1'== cp[0] && '6'==cp[1]) {
-                   output_conv = w_oconv16; cp+=2;
+                   cp += 2;
                    enc_idx = UTF_16;
                } else if ('3'== cp[0] && '2'==cp[1]) {
-                   output_conv = w_oconv32; cp+=2;
+                   cp += 2;
                    enc_idx = UTF_32;
                } else {
-                   output_conv = w_oconv;
                    output_encoding = nkf_enc_from_index(UTF_8);
                    continue;
                }
@@ -2014,18 +1832,19 @@ void options(unsigned char *cp)
         case 'W':           /* UTF input */
            if (cp[0] == '8') {
                cp++;
-               input_f = UTF8_INPUT;
+               input_encoding = nkf_enc_from_index(UTF_8);
            }else{
+               int enc_idx;
                if ('1'== cp[0] && '6'==cp[1]) {
                    cp += 2;
-                   input_f = UTF16_INPUT;
                    input_endian = ENDIAN_BIG;
+                   enc_idx = UTF_16;
                } else if ('3'== cp[0] && '2'==cp[1]) {
                    cp += 2;
-                   input_f = UTF32_INPUT;
                    input_endian = ENDIAN_BIG;
+                   enc_idx = UTF_32;
                } else {
-                   input_f = UTF8_INPUT;
+                   input_encoding = nkf_enc_from_index(UTF_8);
                    continue;
                }
                if (cp[0]=='L') {
@@ -2033,20 +1852,25 @@ void options(unsigned char *cp)
                    input_endian = ENDIAN_LITTLE;
                } else if (cp[0] == 'B') {
                    cp++;
+                   input_endian = ENDIAN_BIG;
                }
+               enc_idx = enc_idx == UTF_16
+                   ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
+                   : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
+               input_encoding = nkf_enc_from_index(enc_idx);
            }
             continue;
 #endif
         /* Input code assumption */
-        case 'J':   /* JIS input */
-            input_f = JIS_INPUT;
-            continue;
-        case 'E':   /* AT&T EUC input */
-            input_f = EUC_INPUT;
-            continue;
-        case 'S':   /* MS Kanji input */
-            input_f = SJIS_INPUT;
-            continue;
+       case 'J':   /* ISO-2022-JP input */
+           input_encoding = nkf_enc_from_index(ISO_2022_JP);
+           continue;
+       case 'E':   /* EUC-JP input */
+           input_encoding = nkf_enc_from_index(EUC_JP);
+           continue;
+       case 'S':   /* Windows-31J input */
+           input_encoding = nkf_enc_from_index(WINDOWS_31J);
+           continue;
         case 'Z':   /* Convert X0208 alphabet to asii */
             /* alpha_f
               bit:0   Convert JIS X 0208 Alphabet to ASCII
@@ -2108,6 +1932,8 @@ void options(unsigned char *cp)
             } else if (*cp=='0') {
                 mime_decode_f = FALSE;
                 mime_f = FALSE; cp++;
+            } else {
+                mime_f = STRICT_MIME;
             }
             continue;
         case 'M':   /* MIME output */
@@ -2158,10 +1984,10 @@ void options(unsigned char *cp)
             continue;
 #ifndef PERL_XS
         case 'g':
-            if (*cp == '1') {
+            if ('2' <= *cp && *cp <= '9') {
                 guess_f = 2;
                 cp++;
-            } else if (*cp == '0') {
+            } else if (*cp == '0' || *cp == '1') {
                guess_f = 1;
                 cp++;
             } else {
@@ -2198,7 +2024,7 @@ struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf
 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
 {
 #ifdef INPUT_CODE_FIX
-    if (f || !input_f)
+    if (f || !input_encoding)
 #endif
         if (estab_f != f){
             estab_f = f;
@@ -2206,7 +2032,7 @@ void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_ch
 
     if (iconv_func
 #ifdef INPUT_CODE_FIX
-        && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
+        && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
 #endif
         ){
         iconv = iconv_func;
@@ -2586,7 +2412,12 @@ nkf_char noconvert(FILE *f)
 
 void module_connection(void)
 {
-    oconv = output_conv;
+    if (input_encoding) set_input_encoding(input_encoding);
+    if (!output_encoding) {
+       output_encoding = nkf_default_encoding();
+    }
+    set_output_encoding(output_encoding);
+    oconv = nkf_enc_to_oconv(output_encoding);
     o_putc = std_putc;
 
     /* replace continucation module, from output side */
@@ -2646,7 +2477,7 @@ void module_connection(void)
     }
 #endif
 #ifdef UNICODE_NORMALIZATION
-    if (nfc_f && input_f == UTF8_INPUT){
+    if (nfc_f){
         i_nfc_getc = i_getc; i_getc = nfc_getc;
         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
     }
@@ -2659,18 +2490,8 @@ void module_connection(void)
        i_bgetc = i_getc; i_getc = broken_getc;
        i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
     }
-    if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
-        set_iconv(-TRUE, e_iconv);
-    } else if (input_f == SJIS_INPUT) {
-        set_iconv(-TRUE, s_iconv);
-#ifdef UTF8_INPUT_ENABLE
-    } else if (input_f == UTF8_INPUT) {
-        set_iconv(-TRUE, w_iconv);
-    } else if (input_f == UTF16_INPUT) {
-        set_iconv(-TRUE, w_iconv16);
-    } else if (input_f == UTF32_INPUT) {
-        set_iconv(-TRUE, w_iconv32);
-#endif
+    if (input_encoding) {
+        set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
     } else {
         set_iconv(FALSE, e_iconv);
     }
@@ -2694,7 +2515,7 @@ void check_bom(FILE *f)
        if((c2 = (*i_getc)(f)) == 0x00){
            if((c2 = (*i_getc)(f)) == 0xFE){
                if((c2 = (*i_getc)(f)) == 0xFF){
-                   if(!input_f){
+                   if(!input_encoding){
                        set_iconv(TRUE, w_iconv32);
                    }
                    if (iconv == w_iconv32) {
@@ -2706,7 +2527,7 @@ void check_bom(FILE *f)
                (*i_ungetc)(0xFE,f);
            }else if(c2 == 0xFF){
                if((c2 = (*i_getc)(f)) == 0xFE){
-                   if(!input_f){
+                   if(!input_encoding){
                        set_iconv(TRUE, w_iconv32);
                    }
                    if (iconv == w_iconv32) {
@@ -2724,7 +2545,7 @@ void check_bom(FILE *f)
     case 0xEF:
        if((c2 = (*i_getc)(f)) == 0xBB){
            if((c2 = (*i_getc)(f)) == 0xBF){
-               if(!input_f){
+               if(!input_encoding){
                    set_iconv(TRUE, w_iconv);
                }
                if (iconv == w_iconv) {
@@ -2740,7 +2561,7 @@ void check_bom(FILE *f)
        if((c2 = (*i_getc)(f)) == 0xFF){
            if((c2 = (*i_getc)(f)) == 0x00){
                if((c2 = (*i_getc)(f)) == 0x00){
-                   if(!input_f){
+                   if(!input_encoding){
                        set_iconv(TRUE, w_iconv32);
                    }
                    if (iconv == w_iconv32) {
@@ -2751,7 +2572,7 @@ void check_bom(FILE *f)
                }else (*i_ungetc)(c2,f);
                (*i_ungetc)(0x00,f);
            }else (*i_ungetc)(c2,f);
-           if(!input_f){
+           if(!input_encoding){
                set_iconv(TRUE, w_iconv16);
            }
            if (iconv == w_iconv16) {
@@ -2766,7 +2587,7 @@ void check_bom(FILE *f)
        if((c2 = (*i_getc)(f)) == 0xFE){
            if((c2 = (*i_getc)(f)) == 0x00){
                if((c2 = (*i_getc)(f)) == 0x00){
-                   if(!input_f){
+                   if(!input_encoding){
                        set_iconv(TRUE, w_iconv32);
                    }
                    if (iconv == w_iconv32) {
@@ -2777,7 +2598,7 @@ void check_bom(FILE *f)
                }else (*i_ungetc)(c2,f);
                (*i_ungetc)(0x00,f);
            }else (*i_ungetc)(c2,f);
-           if(!input_f){
+           if(!input_encoding){
                set_iconv(TRUE, w_iconv16);
            }
            if (iconv == w_iconv16) {
@@ -2803,11 +2624,7 @@ nkf_char kanji_convert(FILE *f)
     nkf_char    c3, c2=0, c1, c0=0;
     int is_8bit = FALSE;
 
-    if(input_f == SJIS_INPUT || input_f == EUC_INPUT
-#ifdef UTF8_INPUT_ENABLE
-       || input_f == UTF8_INPUT || input_f == UTF16_INPUT
-#endif
-      ){
+    if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
        is_8bit = TRUE;
     }
 
@@ -2824,12 +2641,12 @@ nkf_char kanji_convert(FILE *f)
 
     while ((c1 = (*i_getc)(f)) != EOF) {
 #ifdef INPUT_CODE_FIX
-       if (!input_f)
+       if (!input_encoding)
 #endif
            code_status(c1);
         if (c2) {
             /* second byte */
-            if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
+            if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
                 /* in case of 8th bit is on */
                 if (!estab_f&&!mime_decode_mode) {
                     /* in case of not established yet */
@@ -2918,7 +2735,7 @@ nkf_char kanji_convert(FILE *f)
                 SEND;
            } else
 #endif
-           if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
+           if (c1 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
                 /* 8 bit code */
                 if (!estab_f && !iso8859_f) {
                     /* not established yet */
@@ -3458,6 +3275,8 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
                if (0x9E < c1) c2++;
            }
        }else{
+#define         SJ0162  0x00e1          /* 01 - 62 ku offset */
+#define         SJ6394  0x0161          /* 63 - 94 ku offset */
            c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
            if (0x9E < c1) c2++;
        }
@@ -4408,7 +4227,7 @@ void s_oconv(nkf_char c2, nkf_char c1)
            if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
                /* CP932 UDC */
                c1 &= 0xFFF;
-               c2 = c1 / 188 + 0xF0;
+               c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
                c1 = c1 % 188;
                c1 += 0x40 + (c1 > 0x3e);
                (*o_putc)(c2);
@@ -5017,7 +4836,7 @@ void hira_conv(nkf_char c2, nkf_char c1)
                 c2 = 0x24;
                 (*o_hira_conv)(c2,c1);
                 return;
-            } else if (c1 == 0x74 && (output_conv == w_oconv || output_conv == w_oconv16)) {
+            } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
                 c2 = 0;
                 c1 = CLASS_UNICODE | 0x3094;
                 (*o_hira_conv)(c2,c1);
@@ -5471,7 +5290,7 @@ nkf_char nfc_getc(FILE *f)
     nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
     int i=0, j, k=1, lower, upper;
     nkf_char buf[9];
-    const nkf_nfchar *array;
+    const unsigned char *array;
 
     buf[i] = (*g)(f);
     while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
@@ -6250,7 +6069,6 @@ void reinit(void)
     binmode_f = TRUE;
     rot_f = FALSE;
     hira_f = FALSE;
-    input_f = FALSE;
     alpha_f = FALSE;
     mime_f = MIME_DECODE_DEFAULT;
     mime_decode_f = FALSE;
@@ -6316,8 +6134,6 @@ void reinit(void)
     kanji_intro = DEFAULT_J;
     ascii_intro = DEFAULT_R;
     fold_margin  = FOLD_MARGIN;
-    output_conv = DEFAULT_CONV;
-    oconv = DEFAULT_CONV;
     o_zconv = no_connection;
     o_fconv = no_connection;
     o_nlconv = no_connection;
@@ -6351,7 +6167,8 @@ void reinit(void)
     iconv_for_check = 0;
 #endif
     input_codename = NULL;
-    output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
+    input_encoding = NULL;
+    output_encoding = NULL;
 #ifdef WIN32DLL
     reinitdll();
 #endif /*WIN32DLL*/
@@ -6373,100 +6190,100 @@ nkf_char no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
 #ifdef WIN32DLL
 #define fprintf dllprintf
 #endif
+
+void version(void)
+{
+    fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
+}
+
 void usage(void)
 {
-    fprintf(HELP_OUTPUT,"USAGE:  nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
-    fprintf(HELP_OUTPUT,"Flags:\n");
-    fprintf(HELP_OUTPUT,"b,u      Output is buffered (DEFAULT),Output is unbuffered\n");
-#ifdef DEFAULT_CODE_SJIS
-    fprintf(HELP_OUTPUT,"j,s,e,w  Output code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n");
-#endif
-#ifdef DEFAULT_CODE_JIS
-    fprintf(HELP_OUTPUT,"j,s,e,w  Output code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n");
-#endif
-#ifdef DEFAULT_CODE_EUC
-    fprintf(HELP_OUTPUT,"j,s,e,w  Output code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n");
-#endif
-#ifdef DEFAULT_CODE_UTF8
-    fprintf(HELP_OUTPUT,"j,s,e,w  Output code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n");
-#endif
+    fprintf(HELP_OUTPUT,
+           "USAGE:  nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"
+           "Flags:\n"
+           "b,u      Output is buffered (DEFAULT),Output is unbuffered\n"
+           "j,s,e,w  Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"
 #ifdef UTF8_OUTPUT_ENABLE
-    fprintf(HELP_OUTPUT,"         After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n");
+           "         After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"
 #endif
-    fprintf(HELP_OUTPUT,"J,S,E,W  Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n");
+           "J,S,E,W  Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"
 #ifdef UTF8_INPUT_ENABLE
-    fprintf(HELP_OUTPUT,"         After 'W' you can add more options. -W[ 8, 16 [BL] ] \n");
-#endif
-    fprintf(HELP_OUTPUT,"t        no conversion\n");
-    fprintf(HELP_OUTPUT,"i[@B]    Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n");
-    fprintf(HELP_OUTPUT,"o[BJH]   Specify the Esc Seq for ASCII/Roman        (DEFAULT B)\n");
-    fprintf(HELP_OUTPUT,"r        {de/en}crypt ROT13/47\n");
-    fprintf(HELP_OUTPUT,"h        1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
-    fprintf(HELP_OUTPUT,"m[BQN0]  MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
-    fprintf(HELP_OUTPUT,"M[BQ]    MIME encode [B:base64 Q:quoted]\n");
-    fprintf(HELP_OUTPUT,"l        ISO8859-1 (Latin-1) support\n");
-    fprintf(HELP_OUTPUT,"f/F      Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
-    fprintf(HELP_OUTPUT,"Z[0-4]   Default/0: Convert JISX0208 Alphabet to ASCII\n");
-    fprintf(HELP_OUTPUT,"         1: Kankaku to one space  2: to two spaces  3: HTML Entity\n");
-    fprintf(HELP_OUTPUT,"         4: JISX0208 Katakana to JISX0201 Katakana\n");
-    fprintf(HELP_OUTPUT,"X,x      Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
-    fprintf(HELP_OUTPUT,"B[0-2]   Broken input  0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
+           "         After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"
+#endif
+           "t        no conversion\n"
+           "i[@B]    Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"
+           "o[BJH]   Specify the Esc Seq for ASCII/Roman        (DEFAULT B)\n"
+           "r        {de/en}crypt ROT13/47\n"
+           "h        1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"
+           "m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"
+           "M[BQ]    MIME encode [B:base64 Q:quoted]\n"
+           "l        ISO8859-1 (Latin-1) support\n"
+           "f/F      Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
+           "Z[0-4]   Default/0: Convert JISX0208 Alphabet to ASCII\n"
+           "         1: Kankaku to one space  2: to two spaces  3: HTML Entity\n"
+           "         4: JISX0208 Katakana to JISX0201 Katakana\n"
+           "X,x      Assume X0201 kana in MS-Kanji, -x preserves X0201\n"
+           "B[0-2]   Broken input  0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"
 #ifdef MSDOS
-    fprintf(HELP_OUTPUT,"T        Text mode output\n");
-#endif
-    fprintf(HELP_OUTPUT,"O        Output to File (DEFAULT 'nkf.out')\n");
-    fprintf(HELP_OUTPUT,"I        Convert non ISO-2022-JP charactor to GETA\n");
-    fprintf(HELP_OUTPUT,"d,c      Convert line breaks  -d: LF  -c: CRLF\n");
-    fprintf(HELP_OUTPUT,"-L[uwm]  line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
-    fprintf(HELP_OUTPUT,"v, V     Show this usage. V: show configuration\n");
-    fprintf(HELP_OUTPUT,"\n");
-    fprintf(HELP_OUTPUT,"Long name options\n");
-    fprintf(HELP_OUTPUT," --ic=<input codeset>  --oc=<output codeset>\n");
-    fprintf(HELP_OUTPUT,"                   Specify the input or output codeset\n");
-    fprintf(HELP_OUTPUT," --fj  --unix --mac  --windows\n");
-    fprintf(HELP_OUTPUT," --jis  --euc  --sjis  --utf8  --utf16  --mime  --base64\n");
-    fprintf(HELP_OUTPUT,"                   Convert for the system or code\n");
-    fprintf(HELP_OUTPUT," --hiragana  --katakana  --katakana-hiragana\n");
-    fprintf(HELP_OUTPUT,"                   To Hiragana/Katakana Conversion\n");
-    fprintf(HELP_OUTPUT," --prefix=         Insert escape before troublesome characters of Shift_JIS\n");
+           "T        Text mode output\n"
+#endif
+           "O        Output to File (DEFAULT 'nkf.out')\n"
+           "I        Convert non ISO-2022-JP charactor to GETA\n"
+           "d,c      Convert line breaks  -d: LF  -c: CRLF\n"
+           "-L[uwm]  line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
+           "v, V     Show this usage. V: show configuration\n"
+           "\n"
+           "Long name options\n"
+           " --ic=<input codeset>  --oc=<output codeset>\n"
+           "                   Specify the input or output codeset\n"
+           " --fj  --unix --mac  --windows\n"
+           " --jis  --euc  --sjis  --utf8  --utf16  --mime  --base64\n"
+           "                   Convert for the system or code\n"
+           " --hiragana  --katakana  --katakana-hiragana\n"
+           "                   To Hiragana/Katakana Conversion\n"
+           " --prefix=         Insert escape before troublesome characters of Shift_JIS\n"
 #ifdef INPUT_OPTION
-    fprintf(HELP_OUTPUT," --cap-input, --url-input  Convert hex after ':' or '%%'\n");
+           " --cap-input, --url-input  Convert hex after ':' or '%%'\n"
 #endif
 #ifdef NUMCHAR_OPTION
-    fprintf(HELP_OUTPUT," --numchar-input   Convert Unicode Character Reference\n");
+           " --numchar-input   Convert Unicode Character Reference\n"
 #endif
 #ifdef UTF8_INPUT_ENABLE
-    fprintf(HELP_OUTPUT," --fb-{skip, html, xml, perl, java, subchar}\n");
-    fprintf(HELP_OUTPUT,"                   Specify how nkf handles unassigned characters\n");
+           " --fb-{skip, html, xml, perl, java, subchar}\n"
+           "                   Specify how nkf handles unassigned characters\n"
 #endif
 #ifdef OVERWRITE
-    fprintf(HELP_OUTPUT," --in-place[=SUFFIX]  --overwrite[=SUFFIX]\n");
-    fprintf(HELP_OUTPUT,"                   Overwrite original listed files by filtered result\n");
-    fprintf(HELP_OUTPUT,"                   --overwrite preserves timestamp of original files\n");
-#endif
-    fprintf(HELP_OUTPUT," -g  --guess       Guess the input code\n");
-    fprintf(HELP_OUTPUT," --help  --version Show this help/the version\n");
-    fprintf(HELP_OUTPUT,"                   For more information, see also man nkf\n");
-    fprintf(HELP_OUTPUT,"\n");
+           " --in-place[=SUFFIX]  --overwrite[=SUFFIX]\n"
+           "                   Overwrite original listed files by filtered result\n"
+           "                   --overwrite preserves timestamp of original files\n"
+#endif
+           " -g  --guess       Guess the input code\n"
+           " --help  --version Show this help/the version\n"
+           "                   For more information, see also man nkf\n"
+           "\n");
     version();
 }
 
 void show_configuration(void)
 {
-    fprintf(HELP_OUTPUT, "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n");
-    fprintf(HELP_OUTPUT, "  Compile-time options:\n");
-    fprintf(HELP_OUTPUT, "    Default output encoding:     "
-#if defined(DEFAULT_CODE_JIS)
-           "ISO-2022-JP"
-#elif defined(DEFAULT_CODE_SJIS)
-           "Shift_JIS"
-#elif defined(DEFAULT_CODE_EUC)
-           "EUC-JP"
-#elif defined(DEFAULT_CODE_UTF8)
-           "UTF-8"
+    fprintf(HELP_OUTPUT,
+           "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
+           "  nkf identity:\n"
+           "    " NKF_IDENT "\n"
+           "  Compile-time options:\n"
+           "    Compiled at:                 " __DATE__ " " __TIME__ "\n"
+          );
+    fprintf(HELP_OUTPUT,
+           "    Default output encoding:     "
+#ifdef DEFAULT_ENCIDX
+           "%s\n", nkf_enc_name(nkf_default_encoding())
+#else
+           "%s (%s)\n", nkf_locale_encoding() ? "LOCALE" : "DEFAULT",
+           nkf_enc_name(nkf_default_encoding())
 #endif
-           "\n");
-    fprintf(HELP_OUTPUT, "    Default output newline:      "
+          );
+    fprintf(HELP_OUTPUT,
+           "    Default output newline:      "
 #if DEFAULT_NEWLINE == CR
            "CR"
 #elif DEFAULT_NEWLINE == CRLF
@@ -6474,32 +6291,27 @@ void show_configuration(void)
 #else
            "LF"
 #endif
-           "\n");
-    fprintf(HELP_OUTPUT, "    Decode MIME encoded string:  "
+           "\n"
+           "    Decode MIME encoded string:  "
 #if MIME_DECODE_DEFAULT
            "ON"
 #else
            "OFF"
 #endif
-           "\n");
-    fprintf(HELP_OUTPUT, "    Convert JIS X 0201 Katakana: "
+           "\n"
+           "    Convert JIS X 0201 Katakana: "
 #if X0201_DEFAULT
            "ON"
 #else
            "OFF"
 #endif
-           "\n");
-fprintf(HELP_OUTPUT, " --help, --version output: "
+           "\n"
+           "    --help, --version output:    "
 #if HELP_OUTPUT_HELP_OUTPUT
-"HELP_OUTPUT"
+           "HELP_OUTPUT"
 #else
-"STDOUT"
+           "STDOUT"
 #endif
-"\n");
-}
-
-void version(void)
-{
-    fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
+           "\n");
 }
 #endif /*PERL_XS*/