OSDN Git Service

Fix minor typos
[nkf/nkf.git] / nkf.c
diff --git a/nkf.c b/nkf.c
index b9a38d9..cc438a5 100644 (file)
--- a/nkf.c
+++ b/nkf.c
@@ -1,41 +1,30 @@
-/** Network Kanji Filter. (PDS Version)
- ** -*- coding: ISO-2022-JP -*-
- ************************************************************************
- ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
- ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
- ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
- ** Copyright (C) 1996,1998
- ** Copyright (C) 2002
- ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
- ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
- ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
- ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
- **
- **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
- **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
- **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
- **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
- **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
- **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
- **
- **    Everyone is permitted to do anything on this program
- **    including copying, modifying, improving,
- **    as long as you don't try to pretend that you wrote it.
- **    i.e., the above copyright notice has to appear in all copies.
- **    Binary distribution requires original version messages.
- **    You don't have to ask before copying, redistribution or publishing.
- **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
- ***********************************************************************/
-
-/***********************************************************************
- * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
- * http://sourceforge.jp/projects/nkf/
- ***********************************************************************/
-#define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2009-01-05"
+/*
+ * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
+ * Copyright (c) 1996-2018, The nkf Project.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+#define NKF_VERSION "2.1.5"
+#define NKF_RELEASE_DATE "2018-12-15"
 #define COPY_RIGHT \
-    "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
-    "Copyright (C) 2002-2009 Kono, Furukawa, Naruse, mastodon"
+    "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
+    "Copyright (C) 1996-2018, The nkf Project."
 
 #include "config.h"
 #include "nkf.h"
@@ -221,6 +210,8 @@ struct {
 } encoding_name_to_id_table[] = {
     {"US-ASCII",               ASCII},
     {"ASCII",                  ASCII},
+    {"646",                    ASCII},
+    {"ROMAN8",                 ASCII},
     {"ISO-2022-JP",            ISO_2022_JP},
     {"ISO2022JP-CP932",                CP50220},
     {"CP50220",                        CP50220},
@@ -232,6 +223,8 @@ struct {
     {"ISO-2022-JP-2004",       ISO_2022_JP_2004},
     {"SHIFT_JIS",              SHIFT_JIS},
     {"SJIS",                   SHIFT_JIS},
+    {"MS_Kanji",               SHIFT_JIS},
+    {"PCK",                    SHIFT_JIS},
     {"WINDOWS-31J",            WINDOWS_31J},
     {"CSWINDOWS31J",           WINDOWS_31J},
     {"CP932",                  WINDOWS_31J},
@@ -306,7 +299,7 @@ struct {
                               && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
 
 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
-#define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c < (0xE0&0x7F))
+#define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
 
 #define         HOLD_SIZE       1024
 #if defined(INT_IS_SHORT)
@@ -363,6 +356,7 @@ static  int     no_cp932ext_f = FALSE;
 /* ignore ZERO WIDTH NO-BREAK SPACE */
 static  int     no_best_fit_chars_f = FALSE;
 static  int     input_endian = ENDIAN_BIG;
+static  int     input_bom_f = FALSE;
 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
 static  void    (*encode_fallback)(nkf_char c) = NULL;
 static  void    w_status(struct input_code *, nkf_char);
@@ -390,6 +384,8 @@ static unsigned char   stdibuf[IOBUF_SIZE];
 static unsigned char   stdobuf[IOBUF_SIZE];
 #endif
 
+#define NKF_UNSPECIFIED (-TRUE)
+
 /* flags */
 static int             unbuf_f = FALSE;
 static int             estab_f = FALSE;
@@ -404,7 +400,7 @@ static int             mimebuf_f = FALSE;      /* MIME buffered input */
 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
 static int             iso8859_f = FALSE;      /* ISO8859 through */
 static int             mimeout_f = FALSE;       /* base64 mode */
-static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
+static int             x0201_f = NKF_UNSPECIFIED;   /* convert JIS X 0201 */
 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
 
 #ifdef UNICODE_NORMALIZATION
@@ -435,6 +431,8 @@ static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
 
+#define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
+
 #ifdef NUMCHAR_OPTION
 static int numchar_f = FALSE;
 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
@@ -479,8 +477,10 @@ struct input_code input_code_list[] = {
     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
 #ifdef UTF8_INPUT_ENABLE
     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
+    {"UTF-16",     0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
+    {"UTF-32",     0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
 #endif
-    {0}
+    {NULL,        0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
 };
 
 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
@@ -509,10 +509,10 @@ static int             fold_margin  = FOLD_MARGIN;
 /* process default */
 
 static nkf_char
-no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
+no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
 {
     fprintf(stderr,"nkf internal module connection failure.\n");
-    exit(1);
+    exit(EXIT_FAILURE);
     return 0; /* LINT */
 }
 
@@ -623,6 +623,27 @@ static const unsigned char ev[]= {
     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
     0x00,0x00};
 
+/* X0201 kana to X0213 conversion table for han-daguten */
+/* 90-9F A0-DF */
+static const unsigned char ev_x0213[]= {
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
+    0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
+    0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+    0x00,0x00};
+
 
 /* X0208 kigou conversion table */
 /* 0x8140 - 0x819e */
@@ -660,10 +681,6 @@ static nkf_char prev_cr = 0; /* CR or 0 */
 static int             end_check;
 #endif /*Easy Win */
 
-#define STD_GC_BUFSIZE (256)
-nkf_char std_gc_buf[STD_GC_BUFSIZE];
-nkf_char std_gc_ndx;
-
 static void *
 nkf_xmalloc(size_t size)
 {
@@ -757,7 +774,7 @@ nkf_enc_find(const char *name)
 
 #ifdef DEFAULT_CODE_LOCALE
 static const char*
-nkf_locale_charmap()
+nkf_locale_charmap(void)
 {
 #ifdef HAVE_LANGINFO_H
     return nl_langinfo(CODESET);
@@ -785,7 +802,7 @@ nkf_locale_charmap()
 }
 
 static nkf_encoding*
-nkf_locale_encoding()
+nkf_locale_encoding(void)
 {
     nkf_encoding *enc = 0;
     const char *encname = nkf_locale_charmap();
@@ -796,13 +813,13 @@ nkf_locale_encoding()
 #endif /* DEFAULT_CODE_LOCALE */
 
 static nkf_encoding*
-nkf_utf8_encoding()
+nkf_utf8_encoding(void)
 {
     return &nkf_encoding_table[UTF_8];
 }
 
 static nkf_encoding*
-nkf_default_encoding()
+nkf_default_encoding(void)
 {
     nkf_encoding *enc = 0;
 #ifdef DEFAULT_CODE_LOCALE
@@ -817,30 +834,32 @@ nkf_default_encoding()
 typedef struct {
     long capa;
     long len;
-    unsigned char *ptr;
+    nkf_char *ptr;
 } nkf_buf_t;
 
 static nkf_buf_t *
 nkf_buf_new(int length)
 {
     nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
-    buf->ptr = nkf_xmalloc(length);
+    buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
     buf->capa = length;
     buf->len = 0;
     return buf;
-} 
+}
 
+#if 0
 static void
 nkf_buf_dispose(nkf_buf_t *buf)
 {
     nkf_xfree(buf->ptr);
     nkf_xfree(buf);
 }
+#endif
 
 #define nkf_buf_length(buf) ((buf)->len)
 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
 
-static unsigned char
+static nkf_char
 nkf_buf_at(nkf_buf_t *buf, int index)
 {
     assert(index <= buf->len);
@@ -850,17 +869,19 @@ nkf_buf_at(nkf_buf_t *buf, int index)
 static void
 nkf_buf_clear(nkf_buf_t *buf)
 {
-    buf->ptr = 0;
+    buf->len = 0;
 }
 
 static void
-nkf_buf_push(nkf_buf_t *buf, unsigned char c)
+nkf_buf_push(nkf_buf_t *buf, nkf_char c)
 {
-    assert(buf->capa > buf->len);
+    if (buf->capa <= buf->len) {
+       exit(EXIT_FAILURE);
+    }
     buf->ptr[buf->len++] = c;
 }
 
-static unsigned char
+static nkf_char
 nkf_buf_pop(nkf_buf_t *buf)
 {
     assert(!nkf_buf_empty_p(buf));
@@ -883,79 +904,61 @@ static void
 usage(void)
 {
     fprintf(HELP_OUTPUT,
-           "USAGE:  nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"
-           "Flags:\n"
-           "b,u      Output is buffered (DEFAULT),Output is unbuffered\n"
-           "j,s,e,w  Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"
+           "Usage:  nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
 #ifdef UTF8_OUTPUT_ENABLE
-           "         After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"
+           " j/s/e/w  Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
+           "          UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
+#else
 #endif
-           "J,S,E,W  Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"
 #ifdef UTF8_INPUT_ENABLE
-           "         After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"
+           " J/S/E/W  Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
+           "          UTF option is -W[8,[16,32][B,L]]\n"
+#else
+           " J/S/E    Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
 #endif
-           "t        no conversion\n"
            );
     fprintf(HELP_OUTPUT,
-           "i[@B]    Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"
-           "o[BJH]   Specify the Esc Seq for ASCII/Roman        (DEFAULT B)\n"
-           "r        {de/en}crypt ROT13/47\n"
-           "h        1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"
-           "m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"
-           "M[BQ]    MIME encode [B:base64 Q:quoted]\n"
-           "l        ISO8859-1 (Latin-1) support\n"
-           "f/F      Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
+           " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
+           " M[BQ]    MIME encode [B:base64 Q:quoted]\n"
+           " f/F      Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
            );
     fprintf(HELP_OUTPUT,
-           "Z[0-4]   Default/0: Convert JISX0208 Alphabet to ASCII\n"
-           "         1: Kankaku to one space  2: to two spaces  3: HTML Entity\n"
-           "         4: JISX0208 Katakana to JISX0201 Katakana\n"
-           "X,x      Assume X0201 kana in MS-Kanji, -x preserves X0201\n"
-           "B[0-2]   Broken input  0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"
+           " Z[0-4]   Default/0: Convert JISX0208 Alphabet to ASCII\n"
+           "          1: Kankaku to one space  2: to two spaces  3: HTML Entity\n"
+           "          4: JISX0208 Katakana to JISX0201 Katakana\n"
+           " X,x      Convert Halfwidth Katakana to Fullwidth or preserve it\n"
            );
     fprintf(HELP_OUTPUT,
-#ifdef MSDOS
-           "T        Text mode output\n"
-#endif
-           "O        Output to File (DEFAULT 'nkf.out')\n"
-           "I        Convert non ISO-2022-JP charactor to GETA\n"
-           "d,c      Convert line breaks  -d: LF  -c: CRLF\n"
-           "-L[uwm]  line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
-           "v, V     Show this usage. V: show configuration\n"
-           "\n");
+           " O        Output to File (DEFAULT 'nkf.out')\n"
+           " L[uwm]   Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
+           );
     fprintf(HELP_OUTPUT,
-           "Long name options\n"
-           " --ic=<input codeset>  --oc=<output codeset>\n"
-           "                   Specify the input or output codeset\n"
-           " --fj  --unix --mac  --windows\n"
-           " --jis  --euc  --sjis  --utf8  --utf16  --mime  --base64\n"
-           "                   Convert for the system or code\n"
-           " --hiragana  --katakana  --katakana-hiragana\n"
-           "                   To Hiragana/Katakana Conversion\n"
-           " --prefix=         Insert escape before troublesome characters of Shift_JIS\n"
+           " --ic=<encoding>        Specify the input encoding\n"
+           " --oc=<encoding>        Specify the output encoding\n"
+           " --hiragana --katakana  Hiragana/Katakana Conversion\n"
+           " --katakana-hiragana    Converts each other\n"
            );
     fprintf(HELP_OUTPUT,
 #ifdef INPUT_OPTION
-           " --cap-input, --url-input  Convert hex after ':' or '%%'\n"
+           " --{cap, url}-input     Convert hex after ':' or '%%'\n"
 #endif
 #ifdef NUMCHAR_OPTION
-           " --numchar-input   Convert Unicode Character Reference\n"
+           " --numchar-input        Convert Unicode Character Reference\n"
 #endif
 #ifdef UTF8_INPUT_ENABLE
            " --fb-{skip, html, xml, perl, java, subchar}\n"
-           "                   Specify how nkf handles unassigned characters\n"
+           "                        Specify unassigned character's replacement\n"
 #endif
            );
     fprintf(HELP_OUTPUT,
 #ifdef OVERWRITE
-           " --in-place[=SUFFIX]  --overwrite[=SUFFIX]\n"
-           "                   Overwrite original listed files by filtered result\n"
-           "                   --overwrite preserves timestamp of original files\n"
+           " --in-place[=SUF]       Overwrite original files\n"
+           " --overwrite[=SUF]      Preserve timestamp of original files\n"
 #endif
-           " -g  --guess       Guess the input code\n"
-           " --help  --version Show this help/the version\n"
-           "                   For more information, see also man nkf\n"
-           "\n");
+           " -g --guess             Guess the input code\n"
+           " -v --version           Print the version\n"
+           " --help/-V              Print this help / configuration\n"
+           );
     version();
 }
 
@@ -1055,7 +1058,7 @@ nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
     int shift = 20;
     c &= VALUE_MASK;
     while(shift >= 0){
-       if(c >= 1<<shift){
+       if(c >= NKF_INT32_C(1)<<shift){
            while(shift >= 0){
                (*f)(0, bin2hex(c>>shift));
                shift -= 4;
@@ -1108,18 +1111,26 @@ encode_fallback_java(nkf_char c)
     (*oconv)(0, '\\');
     c &= VALUE_MASK;
     if(!nkf_char_unicode_bmp_p(c)){
-       (*oconv)(0, 'U');
-       (*oconv)(0, '0');
-       (*oconv)(0, '0');
-       (*oconv)(0, bin2hex(c>>20));
-       (*oconv)(0, bin2hex(c>>16));
+        int high = (c >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
+        int low = (c & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
+       (*oconv)(0, 'u');
+       (*oconv)(0, bin2hex(high>>12));
+       (*oconv)(0, bin2hex(high>> 8));
+       (*oconv)(0, bin2hex(high>> 4));
+       (*oconv)(0, bin2hex(high    ));
+       (*oconv)(0, '\\');
+       (*oconv)(0, 'u');
+       (*oconv)(0, bin2hex(low>>12));
+       (*oconv)(0, bin2hex(low>> 8));
+       (*oconv)(0, bin2hex(low>> 4));
+       (*oconv)(0, bin2hex(low    ));
     }else{
        (*oconv)(0, 'u');
+       (*oconv)(0, bin2hex(c>>12));
+       (*oconv)(0, bin2hex(c>> 8));
+       (*oconv)(0, bin2hex(c>> 4));
+       (*oconv)(0, bin2hex(c    ));
     }
-    (*oconv)(0, bin2hex(c>>12));
-    (*oconv)(0, bin2hex(c>> 8));
-    (*oconv)(0, bin2hex(c>> 4));
-    (*oconv)(0, bin2hex(c    ));
     return;
 }
 
@@ -1153,7 +1164,7 @@ static const struct {
     {"euc","e"},
     {"euc-input","E"},
     {"fj","jm"},
-    {"help","v"},
+    {"help",""},
     {"jis","j"},
     {"jis-input","J"},
     {"mac","sLm"},
@@ -1163,7 +1174,7 @@ static const struct {
     {"sjis","s"},
     {"sjis-input","S"},
     {"unix","eLu"},
-    {"version","V"},
+    {"version","v"},
     {"windows","sLw"},
     {"hiragana","h1"},
     {"katakana","h2"},
@@ -1230,9 +1241,10 @@ set_input_encoding(nkf_encoding *enc)
     case ISO_8859_1:
        iso8859_f = TRUE;
        break;
-    case CP50220:
     case CP50221:
     case CP50222:
+       if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
+    case CP50220:
 #ifdef SHIFTJIS_CP932
        cp51932_f = TRUE;
 #endif
@@ -1254,6 +1266,7 @@ set_input_encoding(nkf_encoding *enc)
     case SHIFT_JIS:
        break;
     case WINDOWS_31J:
+       if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
 #ifdef SHIFTJIS_CP932
        cp51932_f = TRUE;
 #endif
@@ -1275,6 +1288,7 @@ set_input_encoding(nkf_encoding *enc)
     case EUCJP_NKF:
        break;
     case CP51932:
+       if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
 #ifdef SHIFTJIS_CP932
        cp51932_f = TRUE;
 #endif
@@ -1283,6 +1297,7 @@ set_input_encoding(nkf_encoding *enc)
 #endif
        break;
     case EUCJP_MS:
+       if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
 #ifdef SHIFTJIS_CP932
        cp51932_f = FALSE;
 #endif
@@ -1291,6 +1306,7 @@ set_input_encoding(nkf_encoding *enc)
 #endif
        break;
     case EUCJP_ASCII:
+       if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
 #ifdef SHIFTJIS_CP932
        cp51932_f = FALSE;
 #endif
@@ -1303,6 +1319,7 @@ set_input_encoding(nkf_encoding *enc)
        x0213_f = TRUE;
 #ifdef SHIFTJIS_CP932
        cp51932_f = FALSE;
+       if (cp932inv_f == TRUE) cp932inv_f = FALSE;
 #endif
        break;
     case EUC_JISX0213:
@@ -1345,7 +1362,6 @@ set_output_encoding(nkf_encoding *enc)
 {
     switch (nkf_enc_to_index(enc)) {
     case CP50220:
-       x0201_f = TRUE;
 #ifdef SHIFTJIS_CP932
        if (cp932inv_f == TRUE) cp932inv_f = FALSE;
 #endif
@@ -1354,6 +1370,7 @@ set_output_encoding(nkf_encoding *enc)
 #endif
        break;
     case CP50221:
+       if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
 #ifdef SHIFTJIS_CP932
        if (cp932inv_f == TRUE) cp932inv_f = FALSE;
 #endif
@@ -1361,6 +1378,11 @@ set_output_encoding(nkf_encoding *enc)
        ms_ucs_map_f = UCS_MAP_CP932;
 #endif
        break;
+    case ISO_2022_JP:
+#ifdef SHIFTJIS_CP932
+       if (cp932inv_f == TRUE) cp932inv_f = FALSE;
+#endif
+       break;
     case ISO_2022_JP_1:
        x0212_f = TRUE;
 #ifdef SHIFTJIS_CP932
@@ -1368,6 +1390,7 @@ set_output_encoding(nkf_encoding *enc)
 #endif
        break;
     case ISO_2022_JP_3:
+    case ISO_2022_JP_2004:
        x0212_f = TRUE;
        x0213_f = TRUE;
 #ifdef SHIFTJIS_CP932
@@ -1377,6 +1400,7 @@ set_output_encoding(nkf_encoding *enc)
     case SHIFT_JIS:
        break;
     case WINDOWS_31J:
+       if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
 #ifdef UTF8_OUTPUT_ENABLE
        ms_ucs_map_f = UCS_MAP_CP932;
 #endif
@@ -1405,6 +1429,7 @@ set_output_encoding(nkf_encoding *enc)
 #endif
        break;
     case CP51932:
+       if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
 #ifdef SHIFTJIS_CP932
        if (cp932inv_f == TRUE) cp932inv_f = FALSE;
 #endif
@@ -1413,12 +1438,14 @@ set_output_encoding(nkf_encoding *enc)
 #endif
        break;
     case EUCJP_MS:
+       if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
        x0212_f = TRUE;
 #ifdef UTF8_OUTPUT_ENABLE
        ms_ucs_map_f = UCS_MAP_MS;
 #endif
        break;
     case EUCJP_ASCII:
+       if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
        x0212_f = TRUE;
 #ifdef UTF8_OUTPUT_ENABLE
        ms_ucs_map_f = UCS_MAP_ASCII;
@@ -1455,6 +1482,7 @@ set_output_encoding(nkf_encoding *enc)
        output_endian = ENDIAN_LITTLE;
        output_bom_f = TRUE;
        break;
+    case UTF_32:
     case UTF_32BE_BOM:
        output_bom_f = TRUE;
        break;
@@ -1546,13 +1574,26 @@ x0212_unshift(nkf_char c)
 }
 #endif /* X0212_ENABLE */
 
+static int
+is_x0213_2_in_x0212(nkf_char c1)
+{
+    static const char x0213_2_table[] =
+       {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
+    int ku = c1 - 0x20;
+    if (ku <= 15)
+       return x0213_2_table[ku]; /* 1, 3-5, 8, 12-15 */
+    if (78 <= ku && ku <= 94)
+       return 1;
+    return 0;
+}
+
 static nkf_char
 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
 {
     nkf_char ndx;
     if (is_eucg3(c2)){
        ndx = c2 & 0x7f;
-       if (x0213_f){
+       if (x0213_f && is_x0213_2_in_x0212(ndx)){
            if((0x21 <= ndx && ndx <= 0x2F)){
                if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
                if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
@@ -1598,7 +1639,7 @@ s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
     if (0xFC < c1) return 1;
 #ifdef SHIFTJIS_CP932
-    if (!cp932inv_f && is_ibmext_in_sjis(c2)){
+    if (!cp932inv_f && !x0213_f && is_ibmext_in_sjis(c2)){
        val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
        if (val){
            c2 = val >> 8;
@@ -1681,7 +1722,7 @@ nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_
        *p3 = 0x80 | ( val        & 0x3f);
        *p4 = 0;
     } else if (nkf_char_unicode_value_p(val)) {
-       *p1 = 0xe0 |  (val >> 16);
+       *p1 = 0xf0 |  (val >> 18);
        *p2 = 0x80 | ((val >> 12) & 0x3f);
        *p3 = 0x80 | ((val >>  6) & 0x3f);
        *p4 = 0x80 | ( val        & 0x3f);
@@ -1701,7 +1742,7 @@ nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
        /* single byte */
        wc = c1;
     }
-    else if (c1 <= 0xC3) {
+    else if (c1 <= 0xC1) {
        /* trail byte or invalid */
        return -1;
     }
@@ -1841,6 +1882,7 @@ unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_c
            ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
            ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
            ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
+           x0213_f ? utf8_to_euc_2bytes_x0213 :
            utf8_to_euc_2bytes;
        ret =  unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
     }else if(c0 < 0xF0){
@@ -1908,16 +1950,22 @@ unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_c
            ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
            ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
            ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
+           x0213_f ? utf8_to_euc_3bytes_x0213 :
            utf8_to_euc_3bytes;
        ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
     }else return -1;
 #ifdef SHIFTJIS_CP932
-    if (!ret && !cp932inv_f && is_eucg3(*p2)) {
-       nkf_char s2, s1;
-       if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
-           s2e_conv(s2, s1, p2, p1);
-       }else{
-           ret = 1;
+    if (!ret&& is_eucg3(*p2)) {
+       if (cp932inv_f) {
+           if (encode_fallback) ret = 1;
+       }
+       else {
+           nkf_char s2, s1;
+           if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
+               s2e_conv(s2, s1, p2, p1);
+           }else{
+               ret = 1;
+           }
        }
     }
 #endif
@@ -1925,6 +1973,15 @@ unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_c
 }
 
 #ifdef UTF8_OUTPUT_ENABLE
+#define X0213_SURROGATE_FIND(tbl, size, euc) do { \
+       int i; \
+       for (i = 0; i < size; i++) \
+           if (tbl[i][0] == euc) { \
+               low = tbl[i][2]; \
+               break; \
+           } \
+    } while (0)
+
 static nkf_char
 e2w_conv(nkf_char c2, nkf_char c1)
 {
@@ -1947,7 +2004,9 @@ e2w_conv(nkf_char c2, nkf_char c1)
        }
        c2 = (c2&0x7f) - 0x21;
        if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
-           p = x0212_to_utf8_2bytes[c2];
+           p =
+               x0213_f ? x0212_to_utf8_2bytes_x0213[c2] :
+               x0212_to_utf8_2bytes[c2];
        else
            return 0;
 #endif
@@ -1956,6 +2015,7 @@ e2w_conv(nkf_char c2, nkf_char c1)
        c2 = (c2&0x7f) - 0x21;
        if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
            p =
+               x0213_f ? euc_to_utf8_2bytes_x0213[c2] :
                ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
                ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
                euc_to_utf8_2bytes_ms[c2];
@@ -1964,8 +2024,39 @@ e2w_conv(nkf_char c2, nkf_char c1)
     }
     if (!p) return 0;
     c1 = (c1 & 0x7f) - 0x21;
-    if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
-       return p[c1];
+    if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
+       nkf_char val = p[c1];
+       if (x0213_f && 0xD800<=val && val<=0xDBFF) {
+           nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
+           nkf_char low = 0;
+           if (p==x0212_to_utf8_2bytes_x0213[c2]) {
+               X0213_SURROGATE_FIND(x0213_2_surrogate_table, sizeof_x0213_2_surrogate_table, euc);
+           } else {
+               X0213_SURROGATE_FIND(x0213_1_surrogate_table, sizeof_x0213_1_surrogate_table, euc);
+           }
+           if (!low) return 0;
+           return UTF16_TO_UTF32(val, low);
+       } else {
+           return val;
+       }
+    }
+    return 0;
+}
+
+static nkf_char
+e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1)
+{
+    nkf_char euc;
+    int i;
+    for (i = 0; i < sizeof_x0213_combining_chars; i++)
+       if (x0213_combining_chars[i] == comb)
+           break;
+    if (i >= sizeof_x0213_combining_chars)
+       return 0;
+    euc = (c2&0x7f)<<8 | (c1&0x7f);
+    for (i = 0; i < sizeof_x0213_combining_table; i++)
+       if (x0213_combining_table[i][0] == euc)
+           return x0213_combining_table[i][1];
     return 0;
 }
 #endif
@@ -2012,6 +2103,25 @@ w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
        }
     }
     else {
+       int i;
+       if (x0213_f) {
+           c1 = (val >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
+           c2 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
+           for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
+               if (x0213_1_surrogate_table[i][1] == c1 && x0213_1_surrogate_table[i][2] == c2) {
+                   val = x0213_1_surrogate_table[i][0];
+                   *p2 = val >> 8;
+                   *p1 = val & 0xFF;
+                   return 0;
+               }
+           for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
+               if (x0213_2_surrogate_table[i][1] == c1 && x0213_2_surrogate_table[i][2] == c2) {
+                   val = x0213_2_surrogate_table[i][0];
+                   *p2 = PREFIX_EUCG3 | (val >> 8);
+                   *p1 = val & 0xFF;
+                   return 0;
+               }
+       }
        *p2 = 0;
        *p1 = nkf_char_unicode_new(val);
     }
@@ -2084,7 +2194,7 @@ e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
 }
 
 static nkf_char
-s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
+s_iconv(ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
 {
     if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
        if (iso2022jp_f && !x0201_f) {
@@ -2107,6 +2217,30 @@ s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
     return 0;
 }
 
+static int
+x0213_wait_combining_p(nkf_char wc)
+{
+    int i;
+    for (i = 0; i < sizeof_x0213_combining_table; i++) {
+       if (x0213_combining_table[i][1] == wc) {
+           return TRUE;
+       }
+    }
+    return FALSE;
+}
+
+static int
+x0213_combining_p(nkf_char wc)
+{
+    int i;
+    for (i = 0; i < sizeof_x0213_combining_chars; i++) {
+       if (x0213_combining_chars[i] == wc) {
+           return TRUE;
+       }
+    }
+    return FALSE;
+}
+
 static nkf_char
 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
 {
@@ -2174,6 +2308,8 @@ w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
        c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
        c1 = 0;
     } else {
+       if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4)))
+           return -3;
        ret = w2e_conv(c1, c2, c3, &c1, &c2);
     }
     if (ret == 0){
@@ -2182,9 +2318,22 @@ w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
     return ret;
 }
 
+static nkf_char
+w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3)
+{
+    /* continue from the line below 'return -3;' in w_iconv() */
+    nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2);
+    if (ret == 0){
+       (*oconv)(c1, c2);
+    }
+    return ret;
+}
+
 #define NKF_ICONV_INVALID_CODE_RANGE -13
+#define NKF_ICONV_WAIT_COMBINING_CHAR -14
+#define NKF_ICONV_NOT_COMBINED -15
 static size_t
-unicode_iconv(nkf_char wc)
+unicode_iconv(nkf_char wc, int nocombine)
 {
     nkf_char c1, c2;
     int ret = 0;
@@ -2196,6 +2345,8 @@ unicode_iconv(nkf_char wc)
        /* unpaired surrogate */
        return NKF_ICONV_INVALID_CODE_RANGE;
     }else if (wc < 0xFFFF) {
+       if (!nocombine && x0213_f && x0213_wait_combining_p(wc))
+           return NKF_ICONV_WAIT_COMBINING_CHAR;
        ret = w16e_conv(wc, &c2, &c1);
        if (ret) return ret;
     }else if (wc < 0x10FFFF) {
@@ -2208,9 +2359,50 @@ unicode_iconv(nkf_char wc)
     return 0;
 }
 
-#define NKF_ICONV_NEED_ONE_MORE_BYTE -1
-#define NKF_ICONV_NEED_TWO_MORE_BYTES -2
-#define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
+static nkf_char
+unicode_iconv_combine(nkf_char wc, nkf_char wc2)
+{
+    nkf_char c1, c2;
+    int i;
+
+    if (wc2 < 0x80) {
+       return NKF_ICONV_NOT_COMBINED;
+    }else if ((wc2>>11) == 27) {
+       /* unpaired surrogate */
+       return NKF_ICONV_INVALID_CODE_RANGE;
+    }else if (wc2 < 0xFFFF) {
+       if (!x0213_combining_p(wc2))
+           return NKF_ICONV_NOT_COMBINED;
+       for (i = 0; i < sizeof_x0213_combining_table; i++) {
+           if (x0213_combining_table[i][1] == wc &&
+               x0213_combining_table[i][2] == wc2) {
+               c2 = x0213_combining_table[i][0] >> 8;
+               c1 = x0213_combining_table[i][0] & 0x7f;
+               (*oconv)(c2, c1);
+               return 0;
+           }
+       }
+    }else if (wc2 < 0x10FFFF) {
+       return NKF_ICONV_NOT_COMBINED;
+    } else {
+       return NKF_ICONV_INVALID_CODE_RANGE;
+    }
+    return NKF_ICONV_NOT_COMBINED;
+}
+
+static nkf_char
+w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6)
+{
+    nkf_char wc, wc2;
+    wc = nkf_utf8_to_unicode(c1, c2, c3, 0);
+    wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0);
+    if (wc2 < 0)
+       return wc2;
+    return unicode_iconv_combine(wc, wc2);
+}
+
+#define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
+#define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
 static size_t
 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
 {
@@ -2239,31 +2431,63 @@ nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
        }
     }
 
-    return (*unicode_iconv)(wc);
+    return (*unicode_iconv)(wc, FALSE);
+}
+
+static size_t
+nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
+{
+    nkf_char wc, wc2;
+
+    if (input_endian == ENDIAN_BIG) {
+       if (0xD8 <= c3 && c3 <= 0xDB) {
+           return NKF_ICONV_NOT_COMBINED;
+       } else {
+           wc = c1 << 8 | c2;
+           wc2 = c3 << 8 | c4;
+       }
+    } else {
+       if (0xD8 <= c2 && c2 <= 0xDB) {
+           return NKF_ICONV_NOT_COMBINED;
+       } else {
+           wc = c2 << 8 | c1;
+           wc2 = c4 << 8 | c3;
+       }
+    }
+
+    return unicode_iconv_combine(wc, wc2);
+}
+
+static size_t
+nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2)
+{
+    nkf_char wc;
+    if (input_endian == ENDIAN_BIG)
+       wc = c1 << 8 | c2;
+    else
+       wc = c2 << 8 | c1;
+    return (*unicode_iconv)(wc, TRUE);
 }
 
 static nkf_char
-w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
+w_iconv16(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
 {
-    return 0;
+    (*oconv)(c2, c1);
+    return 16; /* different from w_iconv32 */
 }
 
 static nkf_char
-w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
+w_iconv32(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
 {
-    return 0;
+    (*oconv)(c2, c1);
+    return 32; /* different from w_iconv16 */
 }
 
-static size_t
-nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
+static nkf_char
+utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
 {
     nkf_char wc;
 
-    if (c1 == EOF) {
-       (*oconv)(EOF, 0);
-       return 0;
-    }
-
     switch(input_endian){
     case ENDIAN_BIG:
        wc = c2 << 16 | c3 << 8 | c4;
@@ -2280,8 +2504,48 @@ nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
     default:
        return NKF_ICONV_INVALID_CODE_RANGE;
     }
+    return wc;
+}
+
+static size_t
+nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
+{
+    nkf_char wc;
+
+    if (c1 == EOF) {
+       (*oconv)(EOF, 0);
+       return 0;
+    }
+
+    wc = utf32_to_nkf_char(c1, c2, c3, c4);
+    if (wc < 0)
+       return wc;
+
+    return (*unicode_iconv)(wc, FALSE);
+}
+
+static nkf_char
+nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8)
+{
+    nkf_char wc, wc2;
+
+    wc = utf32_to_nkf_char(c1, c2, c3, c4);
+    if (wc < 0)
+       return wc;
+    wc2 = utf32_to_nkf_char(c5, c6, c7, c8);
+    if (wc2 < 0)
+       return wc2;
 
-    return (*unicode_iconv)(wc);
+    return unicode_iconv_combine(wc, wc2);
+}
+
+static size_t
+nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
+{
+    nkf_char wc;
+
+    wc = utf32_to_nkf_char(c1, c2, c3, c4);
+    return (*unicode_iconv)(wc, TRUE);
 }
 #endif
 
@@ -2536,11 +2800,19 @@ s_oconv(nkf_char c2, nkf_char c1)
 }
 
 #ifdef UTF8_OUTPUT_ENABLE
+#define OUTPUT_UTF8(val) do { \
+       nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \
+       (*o_putc)(c1); \
+       if (c2) (*o_putc)(c2); \
+       if (c3) (*o_putc)(c3); \
+       if (c4) (*o_putc)(c4); \
+    } while (0)
+
 static void
 w_oconv(nkf_char c2, nkf_char c1)
 {
     nkf_char c3, c4;
-    nkf_char val;
+    nkf_char val, val2;
 
     if (output_bom_f) {
        output_bom_f = FALSE;
@@ -2556,11 +2828,7 @@ w_oconv(nkf_char c2, nkf_char c1)
 
     if (c2 == 0 && nkf_char_unicode_p(c1)){
        val = c1 & VALUE_MASK;
-       nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
-       (*o_putc)(c1);
-       if (c2) (*o_putc)(c2);
-       if (c3) (*o_putc)(c3);
-       if (c4) (*o_putc)(c4);
+       OUTPUT_UTF8(val);
        return;
     }
 
@@ -2569,27 +2837,46 @@ w_oconv(nkf_char c2, nkf_char c1)
     } else {
        val = e2w_conv(c2, c1);
        if (val){
-           nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
-           (*o_putc)(c1);
-           if (c2) (*o_putc)(c2);
-           if (c3) (*o_putc)(c3);
-           if (c4) (*o_putc)(c4);
+           val2 = e2w_combining(val, c2, c1);
+           if (val2)
+               OUTPUT_UTF8(val2);
+           OUTPUT_UTF8(val);
        }
     }
 }
 
+#define OUTPUT_UTF16_BYTES(c1, c2) do { \
+       if (output_endian == ENDIAN_LITTLE){ \
+           (*o_putc)(c1); \
+           (*o_putc)(c2); \
+       }else{ \
+           (*o_putc)(c2); \
+           (*o_putc)(c1); \
+       } \
+    } while (0)
+
+#define OUTPUT_UTF16(val) do { \
+       if (nkf_char_unicode_bmp_p(val)) { \
+           c2 = (val >> 8) & 0xff; \
+           c1 = val & 0xff; \
+           OUTPUT_UTF16_BYTES(c1, c2); \
+       } else { \
+           val &= VALUE_MASK; \
+           if (val <= UNICODE_MAX) { \
+               c2 = (val >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */ \
+               c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */ \
+               OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \
+               OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \
+           } \
+       } \
+    } while (0)
+
 static void
 w_oconv16(nkf_char c2, nkf_char c1)
 {
     if (output_bom_f) {
        output_bom_f = FALSE;
-       if (output_endian == ENDIAN_LITTLE){
-           (*o_putc)(0xFF);
-           (*o_putc)(0xFE);
-       }else{
-           (*o_putc)(0xFE);
-           (*o_putc)(0xFF);
-       }
+       OUTPUT_UTF16_BYTES(0xFF, 0xFE);
     }
 
     if (c2 == EOF) {
@@ -2598,43 +2885,33 @@ w_oconv16(nkf_char c2, nkf_char c1)
     }
 
     if (c2 == 0 && nkf_char_unicode_p(c1)) {
-       if (nkf_char_unicode_bmp_p(c1)) {
-           c2 = (c1 >> 8) & 0xff;
-           c1 &= 0xff;
-       } else {
-           c1 &= VALUE_MASK;
-           if (c1 <= UNICODE_MAX) {
-               c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
-               c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
-               if (output_endian == ENDIAN_LITTLE){
-                   (*o_putc)(c2 & 0xff);
-                   (*o_putc)((c2 >> 8) & 0xff);
-                   (*o_putc)(c1 & 0xff);
-                   (*o_putc)((c1 >> 8) & 0xff);
-               }else{
-                   (*o_putc)((c2 >> 8) & 0xff);
-                   (*o_putc)(c2 & 0xff);
-                   (*o_putc)((c1 >> 8) & 0xff);
-                   (*o_putc)(c1 & 0xff);
-               }
-           }
-           return;
-       }
+       OUTPUT_UTF16(c1);
     } else if (c2) {
-       nkf_char val = e2w_conv(c2, c1);
-       c2 = (val >> 8) & 0xff;
-       c1 = val & 0xff;
+       nkf_char val, val2;
+       val = e2w_conv(c2, c1);
        if (!val) return;
-    }
-
-    if (output_endian == ENDIAN_LITTLE){
-       (*o_putc)(c1);
-       (*o_putc)(c2);
-    }else{
-       (*o_putc)(c2);
-       (*o_putc)(c1);
-    }
-}
+       val2 = e2w_combining(val, c2, c1);
+       if (val2)
+           OUTPUT_UTF16(val2);
+       OUTPUT_UTF16(val);
+    } else {
+       OUTPUT_UTF16_BYTES(c1, c2);
+    }
+}
+
+#define OUTPUT_UTF32(c) do { \
+       if (output_endian == ENDIAN_LITTLE){ \
+           (*o_putc)( (c)        & 0xFF); \
+           (*o_putc)(((c) >>  8) & 0xFF); \
+           (*o_putc)(((c) >> 16) & 0xFF); \
+           (*o_putc)(0); \
+       }else{ \
+           (*o_putc)(0); \
+           (*o_putc)(((c) >> 16) & 0xFF); \
+           (*o_putc)(((c) >>  8) & 0xFF); \
+           (*o_putc)( (c)        & 0xFF); \
+       } \
+    } while (0)
 
 static void
 w_oconv32(nkf_char c2, nkf_char c1)
@@ -2664,31 +2941,27 @@ w_oconv32(nkf_char c2, nkf_char c1)
     } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
        c1 &= VALUE_MASK;
     } else if (c2) {
-       c1 = e2w_conv(c2, c1);
-       if (!c1) return;
-    }
-    if (output_endian == ENDIAN_LITTLE){
-       (*o_putc)( c1        & 0xFF);
-       (*o_putc)((c1 >>  8) & 0xFF);
-       (*o_putc)((c1 >> 16) & 0xFF);
-       (*o_putc)(0);
-    }else{
-       (*o_putc)(0);
-       (*o_putc)((c1 >> 16) & 0xFF);
-       (*o_putc)((c1 >>  8) & 0xFF);
-       (*o_putc)( c1        & 0xFF);
+       nkf_char val, val2;
+       val = e2w_conv(c2, c1);
+       if (!val) return;
+       val2 = e2w_combining(val, c2, c1);
+       if (val2)
+           OUTPUT_UTF32(val2);
+       c1 = val;
     }
+    OUTPUT_UTF32(c1);
 }
 #endif
 
-#define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
-#define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
-#define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
-#define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
+#define SCORE_L2       (1)                   /* Kanji Level 2 */
+#define SCORE_KANA     (SCORE_L2 << 1)       /* Halfwidth Katakana */
+#define SCORE_DEPEND   (SCORE_KANA << 1)     /* MD Characters */
+#define SCORE_CP932    (SCORE_DEPEND << 1)   /* IBM extended characters */
 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
-#define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
-#define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
-#define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
+#define SCORE_X0213    (SCORE_X0212 << 1)    /* JIS X 0213 */
+#define SCORE_NO_EXIST (SCORE_X0213 << 1)    /* Undefined Characters */
+#define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME selected */
+#define SCORE_ERROR    (SCORE_iMIME << 1) /* Error */
 
 #define SCORE_INIT (SCORE_iMIME)
 
@@ -2696,14 +2969,35 @@ static const nkf_char score_table_A0[] = {
     0, 0, 0, 0,
     0, 0, 0, 0,
     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
-    SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
+    SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_X0213,
 };
 
 static const nkf_char score_table_F0[] = {
     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
-    SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
+    SCORE_L2, SCORE_DEPEND, SCORE_X0213, SCORE_X0213,
     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
-    SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
+    SCORE_CP932, SCORE_X0213, SCORE_X0213, SCORE_ERROR,
+};
+
+static const nkf_char score_table_8FA0[] = {
+    0, SCORE_X0213, SCORE_X0212, SCORE_X0213,
+    SCORE_X0213, SCORE_X0213, SCORE_X0212, SCORE_X0212,
+    SCORE_X0213, SCORE_X0212, SCORE_X0212, SCORE_X0212,
+    SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
+};
+
+static const nkf_char score_table_8FE0[] = {
+    SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
+    SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
+    SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
+    SCORE_X0212, SCORE_X0212, SCORE_X0213, SCORE_X0213,
+};
+
+static const nkf_char score_table_8FF0[] = {
+    SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0212,
+    SCORE_X0212, SCORE_X0213, SCORE_X0213, SCORE_X0213,
+    SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
+    SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
 };
 
 static void
@@ -2726,15 +3020,21 @@ static void
 code_score(struct input_code *ptr)
 {
     nkf_char c2 = ptr->buf[0];
-#ifdef UTF8_OUTPUT_ENABLE
     nkf_char c1 = ptr->buf[1];
-#endif
     if (c2 < 0){
        set_code_score(ptr, SCORE_ERROR);
     }else if (c2 == SS2){
        set_code_score(ptr, SCORE_KANA);
     }else if (c2 == 0x8f){
-       set_code_score(ptr, SCORE_X0212);
+       if ((c1 & 0x70) == 0x20){
+           set_code_score(ptr, score_table_8FA0[c1 & 0x0f]);
+       }else if ((c1 & 0x70) == 0x60){
+           set_code_score(ptr, score_table_8FE0[c1 & 0x0f]);
+       }else if ((c1 & 0x70) == 0x70){
+           set_code_score(ptr, score_table_8FF0[c1 & 0x0f]);
+       }else{
+           set_code_score(ptr, SCORE_X0212);
+       }
 #ifdef UTF8_OUTPUT_ENABLE
     }else if (!e2w_conv(c2, c1)){
        set_code_score(ptr, SCORE_NO_EXIST);
@@ -3008,24 +3308,51 @@ code_status(nkf_char c)
     }
 }
 
+typedef struct {
+    nkf_buf_t *std_gc_buf;
+    nkf_char broken_state;
+    nkf_buf_t *broken_buf;
+    nkf_char mimeout_state;
+    nkf_buf_t *nfc_buf;
+} nkf_state_t;
+
+static nkf_state_t *nkf_state = NULL;
+
+#define STD_GC_BUFSIZE (256)
+
+static void
+nkf_state_init(void)
+{
+    if (nkf_state) {
+       nkf_buf_clear(nkf_state->std_gc_buf);
+       nkf_buf_clear(nkf_state->broken_buf);
+       nkf_buf_clear(nkf_state->nfc_buf);
+    }
+    else {
+       nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
+       nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
+       nkf_state->broken_buf = nkf_buf_new(3);
+       nkf_state->nfc_buf = nkf_buf_new(9);
+    }
+    nkf_state->broken_state = 0;
+    nkf_state->mimeout_state = 0;
+}
+
 #ifndef WIN32DLL
 static nkf_char
 std_getc(FILE *f)
 {
-    if (std_gc_ndx){
-       return std_gc_buf[--std_gc_ndx];
+    if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
+       return nkf_buf_pop(nkf_state->std_gc_buf);
     }
     return getc(f);
 }
 #endif /*WIN32DLL*/
 
 static nkf_char
-std_ungetc(nkf_char c, FILE *f)
+std_ungetc(nkf_char c, ARG_UNUSED FILE *f)
 {
-    if (std_gc_ndx == STD_GC_BUFSIZE){
-       return EOF;
-    }
-    std_gc_buf[std_gc_ndx++] = c;
+    nkf_buf_push(nkf_state->std_gc_buf, c);
     return c;
 }
 
@@ -3038,23 +3365,24 @@ std_putc(nkf_char c)
 }
 #endif /*WIN32DLL*/
 
-static unsigned char   hold_buf[HOLD_SIZE*2];
+static nkf_char   hold_buf[HOLD_SIZE*2];
 static int             hold_count = 0;
 static nkf_char
 push_hold_buf(nkf_char c2)
 {
     if (hold_count >= HOLD_SIZE*2)
        return (EOF);
-    hold_buf[hold_count++] = (unsigned char)c2;
+    hold_buf[hold_count++] = c2;
     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
 }
 
 static int
-h_conv(FILE *f, int c1, int c2)
+h_conv(FILE *f, nkf_char c1, nkf_char c2)
 {
-    int ret, c4, c3;
+    int ret;
     int hold_index;
-
+    int fromhold_count;
+    nkf_char c3, c4;
 
     /** it must NOT be in the kanji shifte sequence      */
     /** it must NOT be written in JIS7                   */
@@ -3104,15 +3432,21 @@ h_conv(FILE *f, int c1, int c2)
     hold_index = 0;
     while (hold_index < hold_count){
        c1 = hold_buf[hold_index++];
-       if (c1 <= DEL){
+       if (nkf_char_unicode_p(c1)) {
+           (*oconv)(0, c1);
+           continue;
+       }
+       else if (c1 <= DEL){
            (*iconv)(0, c1, 0);
            continue;
        }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
            (*iconv)(JIS_X_0201_1976_K, c1, 0);
            continue;
        }
+       fromhold_count = 1;
        if (hold_index < hold_count){
            c2 = hold_buf[hold_index++];
+           fromhold_count++;
        }else{
            c2 = (*i_getc)(f);
            if (c2 == EOF){
@@ -3130,16 +3464,47 @@ h_conv(FILE *f, int c1, int c2)
            } else if ((c3 = (*i_getc)(f)) == EOF) {
                ret = EOF;
                break;
-           } else {
-               code_status(c3);
-               if (hold_index < hold_count){
-                   c4 = hold_buf[hold_index++];
-               } else if ((c4 = (*i_getc)(f)) == EOF) {
-                   c3 = ret = EOF;
-                   break;
+           }
+           code_status(c3);
+           if (hold_index < hold_count){
+               c4 = hold_buf[hold_index++];
+           } else if ((c4 = (*i_getc)(f)) == EOF) {
+               c3 = ret = EOF;
+               break;
+           }
+           code_status(c4);
+           (*iconv)(c1, c2, (c3<<8)|c4);
+           break;
+       case -3:
+           /* 4 bytes UTF-8 (check combining character) */
+           if (hold_index < hold_count){
+               c3 = hold_buf[hold_index++];
+               fromhold_count++;
+           } else if ((c3 = (*i_getc)(f)) == EOF) {
+               w_iconv_nocombine(c1, c2, 0);
+               break;
+           }
+           if (hold_index < hold_count){
+               c4 = hold_buf[hold_index++];
+               fromhold_count++;
+           } else if ((c4 = (*i_getc)(f)) == EOF) {
+               w_iconv_nocombine(c1, c2, 0);
+               if (fromhold_count <= 2)
+                   (*i_ungetc)(c3,f);
+               else
+                   hold_index--;
+               continue;
+           }
+           if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) {
+               w_iconv_nocombine(c1, c2, 0);
+               if (fromhold_count <= 2) {
+                   (*i_ungetc)(c4,f);
+                   (*i_ungetc)(c3,f);
+               } else if (fromhold_count == 3) {
+                   (*i_ungetc)(c4,f);
+                   hold_index--;
                } else {
-                   code_status(c4);
-                   (*iconv)(c1, c2, (c3<<8)|c4);
+                   hold_index -= 2;
                }
            }
            break;
@@ -3147,13 +3512,68 @@ h_conv(FILE *f, int c1, int c2)
            /* 3 bytes EUC or UTF-8 */
            if (hold_index < hold_count){
                c3 = hold_buf[hold_index++];
+               fromhold_count++;
            } else if ((c3 = (*i_getc)(f)) == EOF) {
                ret = EOF;
                break;
            } else {
                code_status(c3);
            }
-           (*iconv)(c1, c2, c3);
+           if ((*iconv)(c1, c2, c3) == -3) {
+               /* 6 bytes UTF-8 (check combining character) */
+               nkf_char c5, c6;
+               if (hold_index < hold_count){
+                   c4 = hold_buf[hold_index++];
+                   fromhold_count++;
+               } else if ((c4 = (*i_getc)(f)) == EOF) {
+                   w_iconv_nocombine(c1, c2, c3);
+                   continue;
+               }
+               if (hold_index < hold_count){
+                   c5 = hold_buf[hold_index++];
+                   fromhold_count++;
+               } else if ((c5 = (*i_getc)(f)) == EOF) {
+                   w_iconv_nocombine(c1, c2, c3);
+                   if (fromhold_count == 4)
+                       hold_index--;
+                   else
+                       (*i_ungetc)(c4,f);
+                   continue;
+               }
+               if (hold_index < hold_count){
+                   c6 = hold_buf[hold_index++];
+                   fromhold_count++;
+               } else if ((c6 = (*i_getc)(f)) == EOF) {
+                   w_iconv_nocombine(c1, c2, c3);
+                   if (fromhold_count == 5) {
+                       hold_index -= 2;
+                   } else if (fromhold_count == 4) {
+                       hold_index--;
+                       (*i_ungetc)(c5,f);
+                   } else {
+                       (*i_ungetc)(c5,f);
+                       (*i_ungetc)(c4,f);
+                   }
+                   continue;
+               }
+               if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) {
+                   w_iconv_nocombine(c1, c2, c3);
+                   if (fromhold_count == 6) {
+                       hold_index -= 3;
+                   } else if (fromhold_count == 5) {
+                       hold_index -= 2;
+                       (*i_ungetc)(c6,f);
+                   } else if (fromhold_count == 4) {
+                       hold_index--;
+                       (*i_ungetc)(c6,f);
+                       (*i_ungetc)(c5,f);
+                   } else {
+                       (*i_ungetc)(c6,f);
+                       (*i_ungetc)(c5,f);
+                       (*i_ungetc)(c4,f);
+                   }
+               }
+           }
            break;
        }
        if (c3 == EOF) break;
@@ -3168,6 +3588,7 @@ static void
 check_bom(FILE *f)
 {
     int c2;
+    input_bom_f = FALSE;
     switch(c2 = (*i_getc)(f)){
     case 0x00:
        if((c2 = (*i_getc)(f)) == 0x00){
@@ -3177,6 +3598,7 @@ check_bom(FILE *f)
                        set_iconv(TRUE, w_iconv32);
                    }
                    if (iconv == w_iconv32) {
+                       input_bom_f = TRUE;
                        input_endian = ENDIAN_BIG;
                        return;
                    }
@@ -3207,6 +3629,7 @@ check_bom(FILE *f)
                    set_iconv(TRUE, w_iconv);
                }
                if (iconv == w_iconv) {
+                   input_bom_f = TRUE;
                    return;
                }
                (*i_ungetc)(0xBF,f);
@@ -3235,6 +3658,7 @@ check_bom(FILE *f)
            }
            if (iconv == w_iconv16) {
                input_endian = ENDIAN_BIG;
+               input_bom_f = TRUE;
                return;
            }
            (*i_ungetc)(0xFF,f);
@@ -3250,6 +3674,7 @@ check_bom(FILE *f)
                    }
                    if (iconv == w_iconv32) {
                        input_endian = ENDIAN_LITTLE;
+                       input_bom_f = TRUE;
                        return;
                    }
                    (*i_ungetc)(0x00,f);
@@ -3261,6 +3686,7 @@ check_bom(FILE *f)
            }
            if (iconv == w_iconv16) {
                input_endian = ENDIAN_LITTLE;
+               input_bom_f = TRUE;
                return;
            }
            (*i_ungetc)(0xFE,f);
@@ -3273,74 +3699,50 @@ check_bom(FILE *f)
     }
 }
 
-static struct {
-    int count;
-    nkf_char status;
-    nkf_char buf[3];
-} broken_state;
-
-static void
-init_broken_state(void)
-{
-    memset(&broken_state, 0, sizeof(broken_state));
-}
-
-static void
-push_broken_buf(c)
-{
-    broken_state.buf[broken_state.count++] = c;
-}
-
-static nkf_char
-pop_broken_buf(void)
-{
-    return broken_state.buf[--broken_state.count];
-}
-
 static nkf_char
 broken_getc(FILE *f)
 {
     nkf_char c, c1;
 
-    if (broken_state.count > 0) {
-       return pop_broken_buf();
+    if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
+       return nkf_buf_pop(nkf_state->broken_buf);
     }
     c = (*i_bgetc)(f);
-    if (c=='$' && broken_state.status != ESC
+    if (c=='$' && nkf_state->broken_state != ESC
        && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
        c1= (*i_bgetc)(f);
-       broken_state.status = 0;
+       nkf_state->broken_state = 0;
        if (c1=='@'|| c1=='B') {
-           push_broken_buf(c1);
-           push_broken_buf(c);
+           nkf_buf_push(nkf_state->broken_buf, c1);
+           nkf_buf_push(nkf_state->broken_buf, c);
            return ESC;
        } else {
            (*i_bungetc)(c1,f);
            return c;
        }
-    } else if (c=='(' && broken_state.status != ESC
+    } else if (c=='(' && nkf_state->broken_state != ESC
               && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
        c1= (*i_bgetc)(f);
-       broken_state.status = 0;
+       nkf_state->broken_state = 0;
        if (c1=='J'|| c1=='B') {
-           push_broken_buf(c1);
-           push_broken_buf(c);
+           nkf_buf_push(nkf_state->broken_buf, c1);
+           nkf_buf_push(nkf_state->broken_buf, c);
            return ESC;
        } else {
            (*i_bungetc)(c1,f);
            return c;
        }
     } else {
-       broken_state.status = c;
+       nkf_state->broken_state = c;
        return c;
     }
 }
 
 static nkf_char
-broken_ungetc(nkf_char c, FILE *f)
+broken_ungetc(nkf_char c, ARG_UNUSED FILE *f)
 {
-    if (broken_state.count < 2)
-       push_broken_buf(c);
+    if (nkf_buf_length(nkf_state->broken_buf) < 2)
+       nkf_buf_push(nkf_state->broken_buf, c);
     return c;
 }
 
@@ -3365,6 +3767,40 @@ eol_conv(nkf_char c2, nkf_char c1)
     else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
 }
 
+static void
+put_newline(void (*func)(nkf_char))
+{
+    switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
+      case CRLF:
+       (*func)(0x0D);
+       (*func)(0x0A);
+       break;
+      case CR:
+       (*func)(0x0D);
+       break;
+      case LF:
+       (*func)(0x0A);
+       break;
+    }
+}
+
+static void
+oconv_newline(void (*func)(nkf_char, nkf_char))
+{
+    switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
+      case CRLF:
+       (*func)(0, 0x0D);
+       (*func)(0, 0x0A);
+       break;
+      case CR:
+       (*func)(0, 0x0D);
+       break;
+      case LF:
+       (*func)(0, 0x0A);
+       break;
+    }
+}
+
 /*
    Return value of fold_conv()
 
@@ -3411,8 +3847,8 @@ fold_conv(nkf_char c2, nkf_char c1)
            f_prev = c1;
            f_line = 0;
            fold_state =  CR;
-       } else if ((f_prev == c1 && !fold_preserve_f)
-                  || (f_prev == LF && fold_preserve_f)
+       } else if ((f_prev == c1)
+                  || (f_prev == LF)
                  ) {        /* duplicate newline */
            if (f_line) {
                f_line = 0;
@@ -3441,9 +3877,7 @@ fold_conv(nkf_char c2, nkf_char c1)
        f_prev = LF;
        f_line = 0;
        fold_state =  LF;            /* output newline and clear */
-    } else if ( (c2==0  && c1==SP)||
-              (c2==0  && c1==TAB)||
-              (c2=='!'&& c1=='!')) {
+    } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
        /* X0208 kankaku or ascii space */
        if (f_prev == SP) {
            fold_state = 0;         /* remove duplicate spaces */
@@ -3461,7 +3895,7 @@ fold_conv(nkf_char c2, nkf_char c1)
        f_prev = c1;
        if (c2 || c2 == JIS_X_0201_1976_K)
            f_prev |= 0x80;  /* this is Japanese */
-       f_line += char_size(c2,c1);
+       f_line += c2 == JIS_X_0201_1976_K ? 1: char_size(c2,c1);
        if (f_line<=fold_len) {   /* normal case */
            fold_state = 1;
        } else {
@@ -3539,13 +3973,13 @@ fold_conv(nkf_char c2, nkf_char c1)
     /* terminator process */
     switch(fold_state) {
     case LF:
-       OCONV_NEWLINE((*o_fconv));
+       oconv_newline(o_fconv);
        (*o_fconv)(c2,c1);
        break;
     case 0:
        return;
     case CR:
-       OCONV_NEWLINE((*o_fconv));
+       oconv_newline(o_fconv);
        break;
     case TAB:
     case SP:
@@ -3580,13 +4014,17 @@ z_conv(nkf_char c2, nkf_char c1)
                    z_prev2 = 0;
                    (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
                    return;
+               } else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-SP)*2]) {  /* \e$BH>ByE@\e(B */
+                   z_prev2 = 0;
+                   (*o_zconv)(ev_x0213[(z_prev1-SP)*2], ev_x0213[(z_prev1-SP)*2+1]);
+                   return;
                }
            }
            z_prev2 = 0;
            (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
        }
        if (c2 == JIS_X_0201_1976_K) {
-           if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
+           if (dv[(c1-SP)*2] || ev[(c1-SP)*2] || (x0213_f && ev_x0213[(c1-SP)*2])) {
                /* wait for \e$BByE@\e(B or \e$BH>ByE@\e(B */
                z_prev1 = c1;
                z_prev2 = c2;
@@ -3694,8 +4132,8 @@ z_conv(nkf_char c2, nkf_char c1)
                0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
                0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
                0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
-               0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
-               0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+               0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
+               0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
            };
            if (fullwidth_to_halfwidth[c1-0x20]){
                c2 = fullwidth_to_halfwidth[c1-0x20];
@@ -3705,6 +4143,10 @@ z_conv(nkf_char c2, nkf_char c1)
                }
                return;
            }
+       } else if (c2 == 0 && nkf_char_unicode_p(c1) &&
+           ((c1&VALUE_MASK) == 0x3099 || (c1&VALUE_MASK) == 0x309A)) { /* \e$B9g@.MQByE@!&H>ByE@\e(B */
+           (*o_zconv)(JIS_X_0201_1976_K, 0x5E + (c1&VALUE_MASK) - 0x3099);
+           return;
        }
     }
     (*o_zconv)(c2,c1);
@@ -3832,6 +4274,7 @@ static const unsigned char *mime_pattern[] = {
     (const unsigned char *)"\075?ISO-8859-1?Q?",
     (const unsigned char *)"\075?ISO-8859-1?B?",
     (const unsigned char *)"\075?ISO-2022-JP?B?",
+    (const unsigned char *)"\075?ISO-2022-JP?B?",
     (const unsigned char *)"\075?ISO-2022-JP?Q?",
 #if defined(UTF8_INPUT_ENABLE)
     (const unsigned char *)"\075?UTF-8?B?",
@@ -3844,7 +4287,7 @@ static const unsigned char *mime_pattern[] = {
 
 /* \e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u\e(B */
 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
-    e_iconv, s_iconv, 0, 0, 0, 0,
+    e_iconv, s_iconv, 0, 0, 0, 0, 0,
 #if defined(UTF8_INPUT_ENABLE)
     w_iconv, w_iconv,
 #endif
@@ -3852,7 +4295,7 @@ nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
 };
 
 static const nkf_char mime_encode[] = {
-    EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K,
+    EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K,
 #if defined(UTF8_INPUT_ENABLE)
     UTF_8, UTF_8,
 #endif
@@ -3861,7 +4304,7 @@ static const nkf_char mime_encode[] = {
 };
 
 static const nkf_char mime_encode_method[] = {
-    'B', 'B','Q', 'B', 'B', 'Q',
+    'B', 'B','Q', 'B', 'B', 'B', 'Q',
 #if defined(UTF8_INPUT_ENABLE)
     'B', 'Q',
 #endif
@@ -3892,7 +4335,7 @@ mime_input_buf_unshift(nkf_char c)
 }
 
 static nkf_char
-mime_ungetc(nkf_char c, FILE *f)
+mime_ungetc(nkf_char c, ARG_UNUSED FILE *f)
 {
     mime_input_buf_unshift(c);
     return c;
@@ -3911,7 +4354,7 @@ mime_ungetc_buf(nkf_char c, FILE *f)
 static nkf_char
 mime_getc_buf(FILE *f)
 {
-    /* we don't keep eof of mime_input_buf, becase it contains ?= as
+    /* we don't keep eof of mime_input_buf, because it contains ?= as
        a terminator. It was checked in mime_integrity. */
     return ((mimebuf_f)?
            (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
@@ -4037,7 +4480,7 @@ mime_begin_strict(FILE *f)
 static nkf_char
 mime_begin(FILE *f)
 {
-    nkf_char c1;
+    nkf_char c1 = 0;
     int i,k;
 
     /* In NONSTRICT mode, only =? is checked. In case of failure, we  */
@@ -4093,7 +4536,7 @@ mime_begin(FILE *f)
 
 #ifdef CHECK_OPTION
 static void
-no_putc(nkf_char c)
+no_putc(ARG_UNUSED nkf_char c)
 {
     ;
 }
@@ -4130,7 +4573,9 @@ get_guessed_code(void)
            if (p->score & (SCORE_DEPEND|SCORE_CP932))
                input_codename = "CP932";
        } else if (strcmp(input_codename, "EUC-JP") == 0) {
-           if (p->score & (SCORE_X0212))
+           if (p->score & SCORE_X0213)
+               input_codename = "EUC-JIS-2004";
+           else if (p->score & (SCORE_X0212))
                input_codename = "EUCJP-MS";
            else if (p->score & (SCORE_DEPEND|SCORE_CP932))
                input_codename = "CP51932";
@@ -4156,8 +4601,13 @@ print_guessed_code(char *filename)
        if (guess_f == 1) {
            printf("%s\n", input_codename);
        } else {
-           printf("%s%s\n",
+           printf("%s%s%s%s\n",
                   input_codename,
+                  iconv != w_iconv16 && iconv != w_iconv32 ? "" :
+                  input_endian == ENDIAN_LITTLE ? " LE" :
+                  input_endian == ENDIAN_BIG ? " BE" :
+                  "[BUG]",
+                  input_bom_f ? " (BOM)" : "",
                   input_eol == CR   ? " (CR)" :
                   input_eol == LF   ? " (LF)" :
                   input_eol == CRLF ? " (CRLF)" :
@@ -4225,7 +4675,7 @@ numchar_getc(FILE *f)
     nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
     int i = 0, j;
     nkf_char buf[12];
-    long c = -1;
+    nkf_char c = -1;
 
     buf[i] = (*g)(f);
     if (buf[i] == '&'){
@@ -4286,14 +4736,14 @@ nfc_getc(FILE *f)
 {
     nkf_char (*g)(FILE *f) = i_nfc_getc;
     nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
-    nkf_buf_t *buf = nkf_buf_new(9);
+    nkf_buf_t *buf = nkf_state->nfc_buf;
     const unsigned char *array;
     int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
     nkf_char c = (*g)(f);
 
     if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
 
-    nkf_buf_push(buf, (unsigned char)c);
+    nkf_buf_push(buf, c);
     do {
        while (lower <= upper) {
            int mid = (lower+upper) / 2;
@@ -4329,7 +4779,6 @@ nfc_getc(FILE *f)
 
     while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
     c = nkf_buf_pop(buf);
-    nkf_buf_dispose(buf);
 
     return c;
 }
@@ -4411,7 +4860,7 @@ mime_getc(FILE *f)
                case LF:
                case CR:
                    if (c1==LF) {
-                       if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
+                       if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
                            i_ungetc(SP,f);
                            continue;
                        } else {
@@ -4420,7 +4869,7 @@ mime_getc(FILE *f)
                        c1 = LF;
                    } else {
                        if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
-                           if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
+                           if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
                                i_ungetc(SP,f);
                                continue;
                            } else {
@@ -4456,7 +4905,7 @@ mime_getc(FILE *f)
        }
        if (c1=='='&&c2<SP) { /* this is soft wrap */
            while((c1 =  (*i_mgetc)(f)) <=SP) {
-               if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
+               if (c1 == EOF) return (EOF);
            }
            mime_decode_mode = 'Q'; /* still in MIME */
            goto restart_mime_q;
@@ -4510,7 +4959,7 @@ mime_getc(FILE *f)
            case LF:
            case CR:
                if (c1==LF) {
-                   if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
+                   if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
                        i_ungetc(SP,f);
                        continue;
                    } else {
@@ -4522,7 +4971,7 @@ mime_getc(FILE *f)
                        if (c1==SP) {
                            i_ungetc(SP,f);
                            continue;
-                       } else if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
+                       } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
                            i_ungetc(SP,f);
                            continue;
                        } else {
@@ -4602,9 +5051,8 @@ static const char basis_64[] =
 
 #define MIMEOUT_BUF_LENGTH 74
 static struct {
-    char buf[MIMEOUT_BUF_LENGTH+1];
+    unsigned char buf[MIMEOUT_BUF_LENGTH+1];
     int count;
-    nkf_char state;
 } mimeout_state;
 
 /*nkf_char mime_lastchar2, mime_lastchar1;*/
@@ -4629,18 +5077,15 @@ open_mime(nkf_char mode)
            (*o_mputc)(mimeout_state.buf[i]);
            i++;
        }
-       PUT_NEWLINE((*o_mputc));
+       put_newline(o_mputc);
        (*o_mputc)(SP);
        base64_count = 1;
-       if (mimeout_state.count>0
-           && (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
-               || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)) {
+       if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
            i++;
        }
     }
     for (;i<mimeout_state.count;i++) {
-       if (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
-           || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF) {
+       if (nkf_isspace(mimeout_state.buf[i])) {
            (*o_mputc)(mimeout_state.buf[i]);
            base64_count ++;
        } else {
@@ -4665,14 +5110,14 @@ mime_prechar(nkf_char c2, nkf_char c1)
        if (c2 == EOF){
            if (base64_count + mimeout_state.count/3*4> 73){
                (*o_base64conv)(EOF,0);
-               OCONV_NEWLINE((*o_base64conv));
+               oconv_newline(o_base64conv);
                (*o_base64conv)(0,SP);
                base64_count = 1;
            }
        } else {
-           if (base64_count + mimeout_state.count/3*4> 66) {
+           if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
                (*o_base64conv)(EOF,0);
-               OCONV_NEWLINE((*o_base64conv));
+               oconv_newline(o_base64conv);
                (*o_base64conv)(0,SP);
                base64_count = 1;
                mimeout_mode = -1;
@@ -4683,7 +5128,7 @@ mime_prechar(nkf_char c2, nkf_char c1)
            mimeout_mode =  (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
            open_mime(output_mode);
            (*o_base64conv)(EOF,0);
-           OCONV_NEWLINE((*o_base64conv));
+           oconv_newline(o_base64conv);
            (*o_base64conv)(0,SP);
            base64_count = 1;
            mimeout_mode = -1;
@@ -4708,13 +5153,13 @@ eof_mime(void)
     case 'B':
        break;
     case 2:
-       (*o_mputc)(basis_64[((mimeout_state.state & 0x3)<< 4)]);
+       (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
        (*o_mputc)('=');
        (*o_mputc)('=');
        base64_count += 3;
        break;
     case 1:
-       (*o_mputc)(basis_64[((mimeout_state.state & 0xF) << 2)]);
+       (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
        (*o_mputc)('=');
        base64_count += 2;
        break;
@@ -4746,19 +5191,19 @@ mimeout_addchar(nkf_char c)
        }
        break;
     case 'B':
-       mimeout_state.state=c;
+       nkf_state->mimeout_state=c;
        (*o_mputc)(basis_64[c>>2]);
        mimeout_mode=2;
        base64_count ++;
        break;
     case 2:
-       (*o_mputc)(basis_64[((mimeout_state.state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
-       mimeout_state.state=c;
+       (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
+       nkf_state->mimeout_state=c;
        mimeout_mode=1;
        base64_count ++;
        break;
     case 1:
-       (*o_mputc)(basis_64[((mimeout_state.state & 0xF) << 2) | ((c & 0xC0) >>6)]);
+       (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
        (*o_mputc)(basis_64[c & 0x3F]);
        mimeout_mode='B';
        base64_count += 2;
@@ -4781,14 +5226,14 @@ mime_putc(nkf_char c)
            if (base64_count > 71){
                if (c!=CR && c!=LF) {
                    (*o_mputc)('=');
-                   PUT_NEWLINE((*o_mputc));
+                   put_newline(o_mputc);
                }
                base64_count = 0;
            }
        }else{
            if (base64_count > 71){
                eof_mime();
-               PUT_NEWLINE((*o_mputc));
+               put_newline(o_mputc);
                base64_count = 0;
            }
            if (c == EOF) { /* c==EOF */
@@ -4850,7 +5295,7 @@ mime_putc(nkf_char c)
            } else if (c <= SP) {
                close_mime();
                if (base64_count > 70) {
-                   PUT_NEWLINE((*o_mputc));
+                   put_newline(o_mputc);
                    base64_count = 0;
                }
                if (!nkf_isblank(c)) {
@@ -4860,7 +5305,7 @@ mime_putc(nkf_char c)
            } else {
                if (base64_count > 70) {
                    close_mime();
-                   PUT_NEWLINE((*o_mputc));
+                   put_newline(o_mputc);
                    (*o_mputc)(SP);
                    base64_count = 1;
                    open_mime(output_mode);
@@ -4870,14 +5315,17 @@ mime_putc(nkf_char c)
                    return;
                }
            }
-           (*o_mputc)(c);
-           base64_count++;
+           if (c != 0x1B) {
+               (*o_mputc)(c);
+               base64_count++;
+               return;
+           }
        }
-       return;
     }
 
     if (mimeout_mode <= 0) {
-       if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
+       if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
+                   output_mode == UTF_8)) {
            if (nkf_isspace(c)) {
                int flag = 0;
                if (mimeout_mode == -1) {
@@ -4915,14 +5363,14 @@ mime_putc(nkf_char c)
                    i = 0;
 
                    for (; i < mimeout_state.count - len; ++i) {
-                       if (!strncmp(mimeout_state.buf+i, str, len)) {
+                       if (!strncmp((char *)(mimeout_state.buf+i), str, len)) {
                            i += len - 2;
                            break;
                        }
                    }
 
                    if (i == 0 || i == mimeout_state.count - len) {
-                       PUT_NEWLINE((*o_mputc));
+                       put_newline(o_mputc);
                        base64_count = 0;
                        if (!nkf_isspace(mimeout_state.buf[0])){
                            (*o_mputc)(SP);
@@ -4934,7 +5382,7 @@ mime_putc(nkf_char c)
                        for (j = 0; j <= i; ++j) {
                            (*o_mputc)(mimeout_state.buf[j]);
                        }
-                       PUT_NEWLINE((*o_mputc));
+                       put_newline(o_mputc);
                        base64_count = 1;
                        for (; j <= mimeout_state.count; ++j) {
                            mimeout_state.buf[j - i] = mimeout_state.buf[j];
@@ -4968,14 +5416,15 @@ mime_putc(nkf_char c)
        }
     }else{
        /* mimeout_mode == 'B', 1, 2 */
-       if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
+       if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
+                   output_mode == UTF_8)) {
            if (lastchar == CR || lastchar == LF){
                if (nkf_isblank(c)) {
                    for (i=0;i<mimeout_state.count;i++) {
                        mimeout_addchar(mimeout_state.buf[i]);
                    }
                    mimeout_state.count = 0;
-               } else if (SP<c && c<DEL) {
+               } else {
                    eof_mime();
                    for (i=0;i<mimeout_state.count;i++) {
                        (*o_mputc)(mimeout_state.buf[i]);
@@ -4986,7 +5435,7 @@ mime_putc(nkf_char c)
                mimeout_state.buf[mimeout_state.count++] = (char)c;
                return;
            }
-           if (c==SP || c==TAB || c==CR || c==LF) {
+           if (nkf_isspace(c)) {
                for (i=0;i<mimeout_state.count;i++) {
                    if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
                        eof_mime();
@@ -5000,8 +5449,8 @@ mime_putc(nkf_char c)
                mimeout_state.buf[mimeout_state.count++] = (char)c;
                if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
                    eof_mime();
-                   for (i=0;i<mimeout_state.count;i++) {
-                       (*o_mputc)(mimeout_state.buf[i]);
+                   for (j=0;j<mimeout_state.count;j++) {
+                       (*o_mputc)(mimeout_state.buf[j]);
                        base64_count++;
                    }
                    mimeout_state.count = 0;
@@ -5055,7 +5504,7 @@ typedef struct nkf_iconv_t {
     size_t input_buffer_size;
     char *output_buffer;
     size_t output_buffer_size;
-}
+};
 
 static nkf_iconv_t
 nkf_iconv_new(char *tocode, char *fromcode)
@@ -5161,7 +5610,7 @@ reinit(void)
     broken_f = FALSE;
     iso8859_f = FALSE;
     mimeout_f = FALSE;
-    x0201_f = X0201_DEFAULT;
+    x0201_f = NKF_UNSPECIFIED;
     iso2022jp_f = FALSE;
 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
     ms_ucs_map_f = UCS_MAP_ASCII;
@@ -5244,7 +5693,6 @@ reinit(void)
     input_eol = 0;
     prev_cr = 0;
     option_mode = 0;
-    init_broken_state();
     z_prev2=0,z_prev1=0;
 #ifdef CHECK_OPTION
     iconv_for_check = 0;
@@ -5252,6 +5700,7 @@ reinit(void)
     input_codename = NULL;
     input_encoding = NULL;
     output_encoding = NULL;
+    nkf_state_init();
 #ifdef WIN32DLL
     reinitdll();
 #endif /*WIN32DLL*/
@@ -5271,10 +5720,16 @@ module_connection(void)
     set_output_encoding(output_encoding);
     oconv = nkf_enc_to_oconv(output_encoding);
     o_putc = std_putc;
+    if (nkf_enc_unicode_p(output_encoding))
+       output_mode = UTF_8;
 
-    /* replace continucation module, from output side */
+    if (x0201_f == NKF_UNSPECIFIED) {
+       x0201_f = X0201_DEFAULT;
+    }
+
+    /* replace continuation module, from output side */
 
-    /* output redicrection */
+    /* output redirection */
 #ifdef CHECK_OPTION
     if (noout_f || guess_f){
        o_putc = no_putc;
@@ -5311,7 +5766,7 @@ module_connection(void)
 
     i_getc = std_getc;
     i_ungetc = std_ungetc;
-    /* input redicrection */
+    /* input redirection */
 #ifdef INPUT_OPTION
     if (cap_f){
        i_cgetc = i_getc; i_getc = cap_getc;
@@ -5379,7 +5834,7 @@ noconvert(FILE *f)
 #define NEXT continue        /* no output, get next */
 #define SKIP c2=0;continue        /* no output, get next */
 #define MORE c2=c1;continue  /* need one more byte */
-#define SEND ;               /* output c1 and c2, get next */
+#define SEND (void)0         /* output c1 and c2, get next */
 #define LAST break           /* end of loop, go closing  */
 #define set_input_mode(mode) do { \
     input_mode = mode; \
@@ -5417,20 +5872,48 @@ kanji_convert(FILE *f)
               (c2 = (*i_getc)(f)) != EOF &&
               (c3 = (*i_getc)(f)) != EOF &&
               (c4 = (*i_getc)(f)) != EOF) {
-           nkf_iconv_utf_32(c1, c2, c3, c4);
+           nkf_char c5, c6, c7, c8;
+           if (nkf_iconv_utf_32(c1, c2, c3, c4) == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
+               if ((c5 = (*i_getc)(f)) != EOF &&
+                   (c6 = (*i_getc)(f)) != EOF &&
+                   (c7 = (*i_getc)(f)) != EOF &&
+                   (c8 = (*i_getc)(f)) != EOF) {
+                   if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) {
+                       (*i_ungetc)(c8, f);
+                       (*i_ungetc)(c7, f);
+                       (*i_ungetc)(c6, f);
+                       (*i_ungetc)(c5, f);
+                       nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
+                   }
+               } else {
+                   nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
+               }
+           }
        }
-       (*i_ungetc)(EOF, f);
+       goto finished;
     }
     else if (iconv == w_iconv16) {
        while ((c1 = (*i_getc)(f)) != EOF &&
               (c2 = (*i_getc)(f)) != EOF) {
-           if (nkf_iconv_utf_16(c1, c2, 0, 0) == -2 &&
+           size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0);
+           if (ret == NKF_ICONV_NEED_TWO_MORE_BYTES &&
                (c3 = (*i_getc)(f)) != EOF &&
                (c4 = (*i_getc)(f)) != EOF) {
                nkf_iconv_utf_16(c1, c2, c3, c4);
+           } else if (ret == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
+               if ((c3 = (*i_getc)(f)) != EOF &&
+                   (c4 = (*i_getc)(f)) != EOF) {
+                   if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) {
+                       (*i_ungetc)(c4, f);
+                       (*i_ungetc)(c3, f);
+                       nkf_iconv_utf_16_nocombine(c1, c2);
+                   }
+               } else {
+                   nkf_iconv_utf_16_nocombine(c1, c2);
+               }
            }
        }
-       (*i_ungetc)(EOF, f);
+       goto finished;
     }
 #endif
 
@@ -5441,11 +5924,11 @@ kanji_convert(FILE *f)
            code_status(c1);
        if (c2) {
            /* second byte */
-           if (c2 > DEL) {
+           if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
                /* in case of 8th bit is on */
                if (!estab_f&&!mime_decode_mode) {
                    /* in case of not established yet */
-                   /* It is still ambiguious */
+                   /* It is still ambiguous */
                    if (h_conv(f, c2, c1)==EOF) {
                        LAST;
                    }
@@ -5477,6 +5960,12 @@ kanji_convert(FILE *f)
            if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
                /* CP5022x */
                MORE;
+           }else if (input_codename && input_codename[0] == 'I' &&
+                   0xA1 <= c1 && c1 <= 0xDF) {
+               /* JIS X 0201 Katakana in 8bit JIS */
+               c2 = JIS_X_0201_1976_K;
+               c1 &= 0x7f;
+               SEND;
            } else if (c1 > DEL) {
                /* 8 bit code */
                if (!estab_f && !iso8859_f) {
@@ -5551,7 +6040,7 @@ kanji_convert(FILE *f)
                SKIP;
            } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
                if ((c1 = (*i_getc)(f)) == EOF) {
-                   /*  (*oconv)(0, ESC); don't send bogus code */
+                   (*oconv)(0, ESC);
                    LAST;
                }
                else if (c1 == '&') {
@@ -5627,6 +6116,7 @@ kanji_convert(FILE *f)
                    else if (c1 == 'I') {
                        /* JIS X 0201 Katakana */
                        set_input_mode(JIS_X_0201_1976_K);
+                       shift_mode = 1;
                        SKIP;
                    }
                    else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
@@ -5674,14 +6164,15 @@ kanji_convert(FILE *f)
                    }
                }
                else {
+                   i_ungetc(c1,f);
                    /* lonely ESC  */
                    (*oconv)(0, ESC);
-                   SEND;
+                   SKIP;
                }
            } else if (c1 == ESC && iconv == s_iconv) {
                /* ESC in Shift_JIS */
                if ((c1 = (*i_getc)(f)) == EOF) {
-                   /*  (*oconv)(0, ESC); don't send bogus code */
+                   (*oconv)(0, ESC);
                    LAST;
                } else if (c1 == '$') {
                    /* J-PHONE emoji */
@@ -5713,9 +6204,10 @@ kanji_convert(FILE *f)
                    }
                }
                else {
+                   i_ungetc(c1,f);
                    /* lonely ESC  */
                    (*oconv)(0, ESC);
-                   SEND;
+                   SKIP;
                }
            } else if (c1 == LF || c1 == CR) {
                if (broken_f&4) {
@@ -5768,11 +6260,52 @@ kanji_convert(FILE *f)
                    }
                }
                break;
+           case -3:
+               /* 4 bytes UTF-8 (check combining character) */
+               if ((c3 = (*i_getc)(f)) != EOF) {
+                   if ((c4 = (*i_getc)(f)) != EOF) {
+                       if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) {
+                           (*i_ungetc)(c4, f);
+                           (*i_ungetc)(c3, f);
+                           w_iconv_nocombine(c2, c1, 0);
+                       }
+                   } else {
+                       (*i_ungetc)(c3, f);
+                       w_iconv_nocombine(c2, c1, 0);
+                   }
+               } else {
+                   w_iconv_nocombine(c2, c1, 0);
+               }
+               break;
            case -1:
                /* 3 bytes EUC or UTF-8 */
                if ((c3 = (*i_getc)(f)) != EOF) {
                    code_status(c3);
-                   (*iconv)(c2, c1, c3);
+                   if ((*iconv)(c2, c1, c3) == -3) {
+                       /* 6 bytes UTF-8 (check combining character) */
+                       nkf_char c5, c6;
+                       if ((c4 = (*i_getc)(f)) != EOF) {
+                           if ((c5 = (*i_getc)(f)) != EOF) {
+                               if ((c6 = (*i_getc)(f)) != EOF) {
+                                   if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) {
+                                       (*i_ungetc)(c6, f);
+                                       (*i_ungetc)(c5, f);
+                                       (*i_ungetc)(c4, f);
+                                       w_iconv_nocombine(c2, c1, c3);
+                                   }
+                               } else {
+                                   (*i_ungetc)(c5, f);
+                                   (*i_ungetc)(c4, f);
+                                   w_iconv_nocombine(c2, c1, c3);
+                               }
+                           } else {
+                               (*i_ungetc)(c4, f);
+                               w_iconv_nocombine(c2, c1, c3);
+                           }
+                       } else {
+                           w_iconv_nocombine(c2, c1, c3);
+                       }
+                   }
                }
                break;
            }
@@ -5806,6 +6339,7 @@ kanji_convert(FILE *f)
        /* goto next_word */
     }
 
+finished:
     /* epilogue */
     (*iconv)(EOF, 0, 0);
     if (!input_codename)
@@ -5828,7 +6362,7 @@ kanji_convert(FILE *f)
 
 /*
  * int options(unsigned char *cp)
- * 
+ *
  * return values:
  *    0: success
  *   -1: ArgumentError
@@ -5857,7 +6391,7 @@ options(unsigned char *cp)
                option_mode = 1;
                return 0;
            }
-           for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
+           for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
                p = (unsigned char *)long_option[i].name;
                for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
                if (*p == cp[j] || cp[j] == SP){
@@ -5877,6 +6411,12 @@ options(unsigned char *cp)
                cp_back = cp;
                cp = (unsigned char *)long_option[i].alias;
            }else{
+#ifndef PERL_XS
+               if (strcmp(long_option[i].name, "help") == 0){
+                   usage();
+                   exit(EXIT_SUCCESS);
+               }
+#endif
                if (strcmp(long_option[i].name, "ic=") == 0){
                    enc = nkf_enc_find((char *)p);
                    if (!enc) continue;
@@ -6115,7 +6655,7 @@ options(unsigned char *cp)
            output_encoding = nkf_enc_from_index(EUCJP_NKF);
            continue;
        case 's':           /* SJIS output */
-           output_encoding = nkf_enc_from_index(WINDOWS_31J);
+           output_encoding = nkf_enc_from_index(SHIFT_JIS);
            continue;
        case 'l':           /* ISO8859 Latin-1 support, no conversion */
            iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
@@ -6125,7 +6665,8 @@ options(unsigned char *cp)
            if (*cp=='@'||*cp=='B')
                kanji_intro = *cp++;
            continue;
-       case 'o':           /* ASCII IN ESC-(-J/B */
+       case 'o':           /* ASCII IN ESC-(-J/B/H */
+           /* ESC ( H was used in initial JUNET messages */
            if (*cp=='J'||*cp=='B'||*cp=='H')
                ascii_intro = *cp++;
            continue;
@@ -6150,15 +6691,15 @@ options(unsigned char *cp)
 #ifndef PERL_XS
        case 'V':
            show_configuration();
-           exit(1);
+           exit(EXIT_SUCCESS);
            break;
        case 'v':
-           usage();
-           exit(1);
+           version();
+           exit(EXIT_SUCCESS);
            break;
 #endif
 #ifdef UTF8_OUTPUT_ENABLE
-       case 'w':           /* UTF-8 output */
+       case 'w':           /* UTF-{8,16,32} output */
            if (cp[0] == '8') {
                cp++;
                if (cp[0] == '0'){
@@ -6183,19 +6724,18 @@ options(unsigned char *cp)
                if (cp[0]=='L') {
                    cp++;
                    output_endian = ENDIAN_LITTLE;
+                   output_bom_f = TRUE;
                } else if (cp[0] == 'B') {
                    cp++;
-               } else {
-                   output_encoding = nkf_enc_from_index(enc_idx);
-                   continue;
+                   output_bom_f = TRUE;
                }
                if (cp[0] == '0'){
+                   output_bom_f = FALSE;
                    cp++;
                    enc_idx = enc_idx == UTF_16
                        ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
                        : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
                } else {
-                   output_bom_f = TRUE;
                    enc_idx = enc_idx == UTF_16
                        ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
                        : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
@@ -6244,8 +6784,8 @@ options(unsigned char *cp)
        case 'E':   /* EUC-JP input */
            input_encoding = nkf_enc_from_index(EUCJP_NKF);
            continue;
-       case 'S':   /* Windows-31J input */
-           input_encoding = nkf_enc_from_index(WINDOWS_31J);
+       case 'S':   /* Shift_JIS input */
+           input_encoding = nkf_enc_from_index(SHIFT_JIS);
            continue;
        case 'Z':   /* Convert X0208 alphabet to asii */
            /* alpha_f
@@ -6255,10 +6795,10 @@ options(unsigned char *cp)
               bit:3   Convert HTML Entity
               bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
             */
-           while ('0'<= *cp && *cp <='9') {
+           while ('0'<= *cp && *cp <='4') {
                alpha_f |= 1 << (*cp++ - '0');
            }
-           if (!alpha_f) alpha_f = 1;
+           alpha_f |= 1;
            continue;
        case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
            x0201_f = FALSE;    /* No X0201->X0208 conversion */
@@ -6372,7 +6912,7 @@ options(unsigned char *cp)
            continue;
 #endif
        case SP:
-           /* module muliple options in a string are allowed for Perl moudle  */
+           /* module multiple options in a string are allowed for Perl module  */
            while(*cp && *cp++!='-');
            continue;
        default:
@@ -6405,6 +6945,8 @@ main(int argc, char **argv)
 #ifdef DEFAULT_CODE_LOCALE
     setlocale(LC_CTYPE, "");
 #endif
+    nkf_state_init();
+
     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
        cp = (unsigned char *)*argv;
        options(cp);