OSDN Git Service

* add preparative code for iconv support.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 #define NKF_IDENT "$Id: nkf.c,v 1.167 2008/01/23 09:21:39 naruse Exp $"
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2008-01-23"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "nkf.h"
42 #include "utf8tbl.h"
43
44 /* state of output_mode and input_mode
45
46    c2           0 means ASCII
47                 JIS_X_0201
48                 ISO_8859_1
49                 JIS_X_0208
50                 EOF      all termination
51    c1           32bit data
52
53  */
54
55 /* MIME ENCODE */
56
57 #define         FIXED_MIME      7
58 #define         STRICT_MIME     8
59
60 /* byte order */
61 enum byte_order {
62     ENDIAN_BIG    = 1,
63     ENDIAN_LITTLE = 2,
64     ENDIAN_2143   = 3,
65     ENDIAN_3412   = 4
66 };
67
68 /* ASCII CODE */
69
70 #define         BS      0x08
71 #define         TAB     0x09
72 #define         LF      0x0a
73 #define         CR      0x0d
74 #define         ESC     0x1b
75 #define         SP      0x20
76 #define         AT      0x40
77 #define         SSP     0xa0
78 #define         DEL     0x7f
79 #define         SI      0x0f
80 #define         SO      0x0e
81 #define         SSO     0x8e
82 #define         SS3     0x8f
83 #define         CRLF    0x0D0A
84
85
86 /* encodings */
87
88 enum nkf_encodings {
89     ASCII,
90     ISO_8859_1,
91     ISO_2022_JP,
92     CP50220,
93     CP50221,
94     CP50222,
95     ISO_2022_JP_1,
96     ISO_2022_JP_3,
97     SHIFT_JIS,
98     WINDOWS_31J,
99     CP10001,
100     EUC_JP,
101     CP51932,
102     EUCJP_MS,
103     EUCJP_ASCII,
104     SHIFT_JISX0213,
105     SHIFT_JIS_2004,
106     EUC_JISX0213,
107     EUC_JIS_2004,
108     UTF_8,
109     UTF_8N,
110     UTF_8_BOM,
111     UTF8_MAC,
112     UTF_16,
113     UTF_16BE,
114     UTF_16BE_BOM,
115     UTF_16LE,
116     UTF_16LE_BOM,
117     UTF_32,
118     UTF_32BE,
119     UTF_32BE_BOM,
120     UTF_32LE,
121     UTF_32LE_BOM,
122     NKF_ENCODING_TABLE_SIZE,
123     JIS_X_0201=0x1000,
124     JIS_X_0208=0x1001,
125     JIS_X_0212=0x1002,
126     JIS_X_0213_1=0x1003,
127     JIS_X_0213_2=0x1004,
128     BINARY
129 };
130
131 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
132 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
133 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
134 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
135 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
136 void j_oconv(nkf_char c2, nkf_char c1);
137 void s_oconv(nkf_char c2, nkf_char c1);
138 void e_oconv(nkf_char c2, nkf_char c1);
139 void w_oconv(nkf_char c2, nkf_char c1);
140 void w_oconv16(nkf_char c2, nkf_char c1);
141 void w_oconv32(nkf_char c2, nkf_char c1);
142
143 typedef struct {
144     const char *name;
145     nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
146     void (*oconv)(nkf_char c2, nkf_char c1);
147 } nkf_native_encoding;
148
149 nkf_native_encoding NkfEncodingASCII =          { "ASCII", e_iconv, e_oconv };
150 nkf_native_encoding NkfEncodingISO_2022_JP =    { "ISO-2022-JP", e_iconv, j_oconv };
151 nkf_native_encoding NkfEncodingShift_JIS =      { "Shift_JIS", s_iconv, s_oconv };
152 nkf_native_encoding NkfEncodingEUC_JP =         { "EUC-JP", e_iconv, e_oconv };
153 nkf_native_encoding NkfEncodingUTF_8 =          { "UTF-8", w_iconv, w_oconv };
154 nkf_native_encoding NkfEncodingUTF_16 =         { "UTF-16", w_iconv16, w_oconv16 };
155 nkf_native_encoding NkfEncodingUTF_32 =         { "UTF-32", w_iconv32, w_oconv32 };
156
157 typedef struct {
158     const int id;
159     const char *name;
160     const nkf_native_encoding *base_encoding;
161 } nkf_encoding;
162
163 nkf_encoding nkf_encoding_table[] = {
164     {ASCII,             "US-ASCII",             &NkfEncodingASCII},
165     {ISO_8859_1,        "ISO-8859-1",           &NkfEncodingASCII},
166     {ISO_2022_JP,       "ISO-2022-JP",          &NkfEncodingISO_2022_JP},
167     {CP50220,           "CP50220",              &NkfEncodingISO_2022_JP},
168     {CP50221,           "CP50221",              &NkfEncodingISO_2022_JP},
169     {CP50222,           "CP50222",              &NkfEncodingISO_2022_JP},
170     {ISO_2022_JP_1,     "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
171     {ISO_2022_JP_3,     "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
172     {SHIFT_JIS,         "Shift_JIS",            &NkfEncodingShift_JIS},
173     {WINDOWS_31J,       "Windows-31J",          &NkfEncodingShift_JIS},
174     {CP10001,           "CP10001",              &NkfEncodingShift_JIS},
175     {EUC_JP,            "EUC-JP",               &NkfEncodingEUC_JP},
176     {CP51932,           "CP51932",              &NkfEncodingEUC_JP},
177     {EUCJP_MS,          "eucJP-MS",             &NkfEncodingEUC_JP},
178     {EUCJP_ASCII,       "eucJP-ASCII",          &NkfEncodingEUC_JP},
179     {SHIFT_JISX0213,    "Shift_JISX0213",       &NkfEncodingShift_JIS},
180     {SHIFT_JIS_2004,    "Shift_JIS-2004",       &NkfEncodingShift_JIS},
181     {EUC_JISX0213,      "EUC-JISX0213",         &NkfEncodingEUC_JP},
182     {EUC_JIS_2004,      "EUC-JIS-2004",         &NkfEncodingEUC_JP},
183     {UTF_8,             "UTF-8",                &NkfEncodingUTF_8},
184     {UTF_8N,            "UTF-8N",               &NkfEncodingUTF_8},
185     {UTF_8_BOM,         "UTF-8-BOM",            &NkfEncodingUTF_8},
186     {UTF8_MAC,          "UTF8-MAC",             &NkfEncodingUTF_8},
187     {UTF_16,            "UTF-16",               &NkfEncodingUTF_16},
188     {UTF_16BE,          "UTF-16BE",             &NkfEncodingUTF_16},
189     {UTF_16BE_BOM,      "UTF-16BE-BOM",         &NkfEncodingUTF_16},
190     {UTF_16LE,          "UTF-16LE",             &NkfEncodingUTF_16},
191     {UTF_16LE_BOM,      "UTF-16LE-BOM",         &NkfEncodingUTF_16},
192     {UTF_32,            "UTF-32",               &NkfEncodingUTF_32},
193     {UTF_32BE,          "UTF-32BE",             &NkfEncodingUTF_32},
194     {UTF_32BE_BOM,      "UTF-32BE-BOM",         &NkfEncodingUTF_32},
195     {UTF_32LE,          "UTF-32LE",             &NkfEncodingUTF_32},
196     {UTF_32LE_BOM,      "UTF-32LE-BOM",         &NkfEncodingUTF_32},
197     {BINARY,            "BINARY",               &NkfEncodingASCII},
198     {-1,                NULL,                   NULL}
199 };
200
201 struct {
202     const char *name;
203     const int id;
204 } encoding_name_to_id_table[] = {
205     {"US-ASCII",                ASCII},
206     {"ASCII",                   ASCII},
207     {"ISO-2022-JP",             ISO_2022_JP},
208     {"ISO2022JP-CP932",         CP50220},
209     {"CP50220",                 CP50220},
210     {"CP50221",                 CP50221},
211     {"CP50222",                 CP50222},
212     {"ISO-2022-JP-1",           ISO_2022_JP_1},
213     {"ISO-2022-JP-3",           ISO_2022_JP_3},
214     {"SHIFT_JIS",               SHIFT_JIS},
215     {"SJIS",                    SHIFT_JIS},
216     {"WINDOWS-31J",             WINDOWS_31J},
217     {"CSWINDOWS31J",            WINDOWS_31J},
218     {"CP932",                   WINDOWS_31J},
219     {"MS932",                   WINDOWS_31J},
220     {"CP10001",                 CP10001},
221     {"EUCJP",                   EUC_JP},
222     {"EUC-JP",                  EUC_JP},
223     {"CP51932",                 CP51932},
224     {"EUC-JP-MS",               EUCJP_MS},
225     {"EUCJP-MS",                EUCJP_MS},
226     {"EUCJPMS",                 EUCJP_MS},
227     {"EUC-JP-ASCII",            EUCJP_ASCII},
228     {"EUCJP-ASCII",             EUCJP_ASCII},
229     {"SHIFT_JISX0213",          SHIFT_JISX0213},
230     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
231     {"EUC-JISX0213",            EUC_JISX0213},
232     {"EUC-JIS-2004",            EUC_JIS_2004},
233     {"UTF-8",                   UTF_8},
234     {"UTF-8N",                  UTF_8N},
235     {"UTF-8-BOM",               UTF_8_BOM},
236     {"UTF8-MAC",                UTF8_MAC},
237     {"UTF-8-MAC",               UTF8_MAC},
238     {"UTF-16",                  UTF_16},
239     {"UTF-16BE",                UTF_16BE},
240     {"UTF-16BE-BOM",            UTF_16BE_BOM},
241     {"UTF-16LE",                UTF_16LE},
242     {"UTF-16LE-BOM",            UTF_16LE_BOM},
243     {"UTF-32",                  UTF_32},
244     {"UTF-32BE",                UTF_32BE},
245     {"UTF-32BE-BOM",            UTF_32BE_BOM},
246     {"UTF-32LE",                UTF_32LE},
247     {"UTF-32LE-BOM",            UTF_32LE_BOM},
248     {"BINARY",                  BINARY},
249     {NULL,                      -1}
250 };
251
252 #if defined(DEFAULT_CODE_JIS)
253 #define     DEFAULT_ENCIDX ISO_2022_JP
254 #elif defined(DEFAULT_CODE_SJIS)
255 #define     DEFAULT_ENCIDX SHIFT_JIS
256 #elif defined(DEFAULT_CODE_EUC)
257 #define     DEFAULT_ENCIDX EUC_JP
258 #elif defined(DEFAULT_CODE_UTF8)
259 #define     DEFAULT_ENCIDX UTF_8
260 #endif
261
262
263 #define         is_alnum(c)  \
264             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
265
266 /* I don't trust portablity of toupper */
267 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
268 #define nkf_isoctal(c)  ('0'<=c && c<='7')
269 #define nkf_isdigit(c)  ('0'<=c && c<='9')
270 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
271 #define nkf_isblank(c) (c == SP || c == TAB)
272 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
273 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
274 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
275 #define nkf_isprint(c) (SP<=c && c<='~')
276 #define nkf_isgraph(c) ('!'<=c && c<='~')
277 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
278                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
279                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
280 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
281 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
282 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
283     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
284      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
285
286 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
287
288 #define         HOLD_SIZE       1024
289 #if defined(INT_IS_SHORT)
290 #define         IOBUF_SIZE      2048
291 #else
292 #define         IOBUF_SIZE      16384
293 #endif
294
295 #define         DEFAULT_J       'B'
296 #define         DEFAULT_R       'B'
297
298
299 #define         RANGE_NUM_MAX   18
300 #define         GETA1   0x22
301 #define         GETA2   0x2e
302
303
304 /* MIME preprocessor */
305
306 #ifdef EASYWIN /*Easy Win */
307 extern POINT _BufferSize;
308 #endif
309
310 struct input_code{
311     char *name;
312     nkf_char stat;
313     nkf_char score;
314     nkf_char index;
315     nkf_char buf[3];
316     void (*status_func)(struct input_code *, nkf_char);
317     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
318     int _file_stat;
319 };
320
321 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
322 static nkf_encoding *input_encoding = NULL;
323 static nkf_encoding *output_encoding = NULL;
324 static void set_output_encoding(nkf_encoding *enc);
325
326 #if !defined(PERL_XS) && !defined(WIN32DLL)
327 static  nkf_char     noconvert(FILE *f);
328 #endif
329 static  void    module_connection(void);
330 static  nkf_char     kanji_convert(FILE *f);
331 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
332 static  nkf_char     push_hold_buf(nkf_char c2);
333 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
334 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
335 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
336 /* UCS Mapping
337  * 0: Shift_JIS, eucJP-ascii
338  * 1: eucJP-ms
339  * 2: CP932, CP51932
340  * 3: CP10001
341  */
342 #define UCS_MAP_ASCII   0
343 #define UCS_MAP_MS      1
344 #define UCS_MAP_CP932   2
345 #define UCS_MAP_CP10001 3
346 static int ms_ucs_map_f = UCS_MAP_ASCII;
347 #endif
348 #ifdef UTF8_INPUT_ENABLE
349 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
350 static  int     no_cp932ext_f = FALSE;
351 /* ignore ZERO WIDTH NO-BREAK SPACE */
352 static  int     no_best_fit_chars_f = FALSE;
353 static  int     input_endian = ENDIAN_BIG;
354 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
355 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
356 static  void    encode_fallback_html(nkf_char c);
357 static  void    encode_fallback_xml(nkf_char c);
358 static  void    encode_fallback_java(nkf_char c);
359 static  void    encode_fallback_perl(nkf_char c);
360 static  void    encode_fallback_subchar(nkf_char c);
361 static  void    (*encode_fallback)(nkf_char c) = NULL;
362 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
363 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
364 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
365 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
366 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
367 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
368 static  void    w_status(struct input_code *, nkf_char);
369 #endif
370 #ifdef UTF8_OUTPUT_ENABLE
371 static  int     output_bom_f = FALSE;
372 static  int     output_endian = ENDIAN_BIG;
373 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
374 #endif
375 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
376 static  void    fold_conv(nkf_char c2,nkf_char c1);
377 static  void    nl_conv(nkf_char c2,nkf_char c1);
378 static  void    z_conv(nkf_char c2,nkf_char c1);
379 static  void    rot_conv(nkf_char c2,nkf_char c1);
380 static  void    hira_conv(nkf_char c2,nkf_char c1);
381 static  void    base64_conv(nkf_char c2,nkf_char c1);
382 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
383 static  void    no_connection(nkf_char c2,nkf_char c1);
384 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
385
386 static  void    code_score(struct input_code *ptr);
387 static  void    code_status(nkf_char c);
388
389 static  void    std_putc(nkf_char c);
390 static  nkf_char     std_getc(FILE *f);
391 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
392
393 static  nkf_char     broken_getc(FILE *f);
394 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
395
396 static  nkf_char     mime_begin(FILE *f);
397 static  nkf_char     mime_getc(FILE *f);
398 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
399
400 static  void    switch_mime_getc(void);
401 static  void    unswitch_mime_getc(void);
402 static  nkf_char     mime_begin_strict(FILE *f);
403 static  nkf_char     mime_getc_buf(FILE *f);
404 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
405 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
406
407 static  nkf_char     base64decode(nkf_char c);
408 static  void    mime_prechar(nkf_char c2, nkf_char c1);
409 static  void    mime_putc(nkf_char c);
410 static  void    open_mime(nkf_char c);
411 static  void    close_mime(void);
412 static  void    eof_mime(void);
413 static  void    mimeout_addchar(nkf_char c);
414 #ifndef PERL_XS
415 static  void    usage(void);
416 static  void    show_configuration(void);
417 #endif
418 static  void    options(unsigned char *c);
419 static  void    reinit(void);
420
421 /* buffers */
422
423 #if !defined(PERL_XS) && !defined(WIN32DLL)
424 static unsigned char   stdibuf[IOBUF_SIZE];
425 static unsigned char   stdobuf[IOBUF_SIZE];
426 #endif
427 static unsigned char   hold_buf[HOLD_SIZE*2];
428 static int             hold_count = 0;
429
430 /* MIME preprocessor fifo */
431
432 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
433 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
434 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
435 static unsigned char           mime_buf[MIME_BUF_SIZE];
436 static unsigned int            mime_top = 0;
437 static unsigned int            mime_last = 0;  /* decoded */
438 static unsigned int            mime_input = 0; /* undecoded */
439 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
440
441 /* flags */
442 static int             unbuf_f = FALSE;
443 static int             estab_f = FALSE;
444 static int             nop_f = FALSE;
445 static int             binmode_f = TRUE;       /* binary mode */
446 static int             rot_f = FALSE;          /* rot14/43 mode */
447 static int             hira_f = FALSE;          /* hira/kata henkan */
448 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
449 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
450 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
451 static int             mimebuf_f = FALSE;      /* MIME buffered input */
452 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
453 static int             iso8859_f = FALSE;      /* ISO8859 through */
454 static int             mimeout_f = FALSE;       /* base64 mode */
455 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
456 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
457
458 #ifdef UNICODE_NORMALIZATION
459 static int nfc_f = FALSE;
460 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
461 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
462 static nkf_char nfc_getc(FILE *f);
463 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
464 #endif
465
466 #ifdef INPUT_OPTION
467 static int cap_f = FALSE;
468 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
469 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
470 static nkf_char cap_getc(FILE *f);
471 static nkf_char cap_ungetc(nkf_char c,FILE *f);
472
473 static int url_f = FALSE;
474 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
475 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
476 static nkf_char url_getc(FILE *f);
477 static nkf_char url_ungetc(nkf_char c,FILE *f);
478 #endif
479
480 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
481 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
482 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
483 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
484 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
485 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
486 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
487
488 #ifdef NUMCHAR_OPTION
489 static int numchar_f = FALSE;
490 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
491 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
492 static nkf_char numchar_getc(FILE *f);
493 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
494 #endif
495
496 #ifdef CHECK_OPTION
497 static int noout_f = FALSE;
498 static void no_putc(nkf_char c);
499 static int debug_f = FALSE;
500 static void debug(const char *str);
501 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
502 #endif
503
504 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
505 #if !defined PERL_XS
506 static  void    print_guessed_code(char *filename);
507 #endif
508 static  void    set_input_codename(char *codename);
509
510 #ifdef EXEC_IO
511 static int exec_f = 0;
512 #endif
513
514 #ifdef SHIFTJIS_CP932
515 /* invert IBM extended characters to others */
516 static int cp51932_f = FALSE;
517
518 /* invert NEC-selected IBM extended characters to IBM extended characters */
519 static int cp932inv_f = TRUE;
520
521 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
522 #endif /* SHIFTJIS_CP932 */
523
524 #ifdef X0212_ENABLE
525 static int x0212_f = FALSE;
526 static nkf_char x0212_shift(nkf_char c);
527 static nkf_char x0212_unshift(nkf_char c);
528 #endif
529 static int x0213_f = FALSE;
530
531 static unsigned char prefix_table[256];
532
533 static void set_code_score(struct input_code *ptr, nkf_char score);
534 static void clr_code_score(struct input_code *ptr, nkf_char score);
535 static void status_disable(struct input_code *ptr);
536 static void status_push_ch(struct input_code *ptr, nkf_char c);
537 static void status_clear(struct input_code *ptr);
538 static void status_reset(struct input_code *ptr);
539 static void status_reinit(struct input_code *ptr);
540 static void status_check(struct input_code *ptr, nkf_char c);
541 static void e_status(struct input_code *, nkf_char);
542 static void s_status(struct input_code *, nkf_char);
543
544 struct input_code input_code_list[] = {
545     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
546     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
547 #ifdef UTF8_INPUT_ENABLE
548     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
549     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
550     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
551 #endif
552     {0}
553 };
554
555 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
556 static int              base64_count = 0;
557
558 /* X0208 -> ASCII converter */
559
560 /* fold parameter */
561 static int             f_line = 0;    /* chars in line */
562 static int             f_prev = 0;
563 static int             fold_preserve_f = FALSE; /* preserve new lines */
564 static int             fold_f  = FALSE;
565 static int             fold_len  = 0;
566
567 /* options */
568 static unsigned char   kanji_intro = DEFAULT_J;
569 static unsigned char   ascii_intro = DEFAULT_R;
570
571 /* Folding */
572
573 #define FOLD_MARGIN  10
574 #define DEFAULT_FOLD 60
575
576 static int             fold_margin  = FOLD_MARGIN;
577
578 /* process default */
579 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
580 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
581
582 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
583 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
584 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
585 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
586 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
587 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
588 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
589
590 /* static redirections */
591
592 static  void   (*o_putc)(nkf_char c) = std_putc;
593
594 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
595 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
596
597 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
598 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
599
600 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
601
602 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
603 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
604
605 /* for strict mime */
606 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
607 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
608
609 /* Global states */
610 static int output_mode = ASCII,    /* output kanji mode */
611            input_mode =  ASCII,    /* input kanji mode */
612            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
613 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
614
615 /* X0201 / X0208 conversion tables */
616
617 /* X0201 kana conversion table */
618 /* 90-9F A0-DF */
619 static const unsigned char cv[]= {
620     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
621     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
622     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
623     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
624     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
625     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
626     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
627     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
628     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
629     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
630     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
631     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
632     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
633     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
634     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
635     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
636     0x00,0x00};
637
638
639 /* X0201 kana conversion table for daguten */
640 /* 90-9F A0-DF */
641 static const unsigned char dv[]= {
642     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
646     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
647     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
648     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
649     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
650     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
651     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
652     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
653     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
654     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
656     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
657     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
658     0x00,0x00};
659
660 /* X0201 kana conversion table for han-daguten */
661 /* 90-9F A0-DF */
662 static const unsigned char ev[]= {
663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
672     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
674     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
675     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
677     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
678     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
679     0x00,0x00};
680
681
682 /* X0208 kigou conversion table */
683 /* 0x8140 - 0x819e */
684 static const unsigned char fv[] = {
685
686     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
687     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
688     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
689     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
690     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
691     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
692     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
693     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
694     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
695     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
696     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
697     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
698 } ;
699
700
701
702 static int             file_out_f = FALSE;
703 #ifdef OVERWRITE
704 static int             overwrite_f = FALSE;
705 static int             preserve_time_f = FALSE;
706 static int             backup_f = FALSE;
707 static char            *backup_suffix = "";
708 static char *get_backup_filename(const char *suffix, const char *filename);
709 #endif
710
711 static int nlmode_f = 0;   /* CR, LF, CRLF */
712 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
713 static nkf_char prev_cr = 0; /* CR or 0 */
714 #ifdef EASYWIN /*Easy Win */
715 static int             end_check;
716 #endif /*Easy Win */
717
718 #define STD_GC_BUFSIZE (256)
719 nkf_char std_gc_buf[STD_GC_BUFSIZE];
720 nkf_char std_gc_ndx;
721
722 char* nkf_strcpy(const char *str)
723 {
724     char* result = malloc(strlen(str) + 1);
725     if (!result){
726         perror(str);
727         return "";
728     }
729     strcpy(result, str);
730     return result;
731 }
732
733 static void nkf_str_upcase(const char *src, char *dest, size_t length)
734 {
735     int i = 0;
736     for (; i < length && src[i]; i++) {
737         dest[i] = nkf_toupper(src[i]);
738     }
739     dest[i] = 0;
740 }
741
742 static nkf_encoding *nkf_enc_from_index(int idx)
743 {
744     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
745         return 0;
746     }
747     return &nkf_encoding_table[idx];
748 }
749
750 static int nkf_enc_find_index(const char *name)
751 {
752     int i, index = -1;
753     if (*name == 'X' && *(name+1) == '-') name += 2;
754     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
755         if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
756             return encoding_name_to_id_table[i].id;
757         }
758     }
759     return index;
760 }
761
762 static nkf_encoding *nkf_enc_find(const char *name)
763 {
764     int idx = -1;
765     idx = nkf_enc_find_index(name);
766     if (idx < 0) return 0;
767     return nkf_enc_from_index(idx);
768 }
769
770 #define nkf_enc_name(enc) (enc)->name
771 #define nkf_enc_to_index(enc) (enc)->id
772 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
773 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
774 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
775 #define nkf_enc_asciicompat(enc) (\
776     nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
777     nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
778 #define nkf_enc_unicode_p(enc) (\
779     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
780     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
781     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
782 #define nkf_enc_cp5022x_p(enc) (\
783     nkf_enc_to_index(enc) == CP50220 ||\
784     nkf_enc_to_index(enc) == CP50221 ||\
785     nkf_enc_to_index(enc) == CP50222)
786
787 #ifndef DEFAULT_ENCIDX
788 static char* nkf_locale_charmap()
789 {
790 #ifdef HAVE_LANGINFO_H
791     return nl_langinfo(CODESET);
792 #elif defined(__WIN32__)
793     return sprintf("CP%d", GetACP());
794 #else
795     return NULL;
796 #endif
797 }
798
799 static nkf_encoding* nkf_locale_encoding()
800 {
801     nkf_encoding *enc = 0;
802     char *encname = nkf_locale_charmap();
803     if (encname)
804         enc = nkf_enc_find(encname);
805     if (enc < 0) enc = 0;
806     return enc;
807 }
808 #endif
809
810 static nkf_encoding* nkf_default_encoding()
811 {
812 #ifdef DEFAULT_ENCIDX
813     return nkf_enc_from_index(DEFAULT_ENCIDX);
814 #else
815     nkf_encoding *enc = nkf_locale_encoding();
816     if (enc <= 0) enc = nkf_enc_from_index(ISO_2022_JP);
817     return enc;
818 #endif
819 }
820
821 #ifdef WIN32DLL
822 #include "nkf32dll.c"
823 #elif defined(PERL_XS)
824 #else /* WIN32DLL */
825 int main(int argc, char **argv)
826 {
827     FILE  *fin;
828     unsigned char  *cp;
829
830     char *outfname = NULL;
831     char *origfname;
832
833 #ifdef EASYWIN /*Easy Win */
834     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
835 #endif
836     setlocale(LC_CTYPE, "");
837
838     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
839         cp = (unsigned char *)*argv;
840         options(cp);
841 #ifdef EXEC_IO
842         if (exec_f){
843             int fds[2], pid;
844             if (pipe(fds) < 0 || (pid = fork()) < 0){
845                 abort();
846             }
847             if (pid == 0){
848                 if (exec_f > 0){
849                     close(fds[0]);
850                     dup2(fds[1], 1);
851                 }else{
852                     close(fds[1]);
853                     dup2(fds[0], 0);
854                 }
855                 execvp(argv[1], &argv[1]);
856             }
857             if (exec_f > 0){
858                 close(fds[1]);
859                 dup2(fds[0], 0);
860             }else{
861                 close(fds[0]);
862                 dup2(fds[1], 1);
863             }
864             argc = 0;
865             break;
866         }
867 #endif
868     }
869
870     if (guess_f) {
871 #ifdef CHECK_OPTION
872         int debug_f_back = debug_f;
873 #endif
874 #ifdef EXEC_IO
875         int exec_f_back = exec_f;
876 #endif
877 #ifdef X0212_ENABLE
878         int x0212_f_back = x0212_f;
879 #endif
880         int x0213_f_back = x0213_f;
881         int guess_f_back = guess_f;
882         reinit();
883         guess_f = guess_f_back;
884         mime_f = FALSE;
885 #ifdef CHECK_OPTION
886         debug_f = debug_f_back;
887 #endif
888 #ifdef EXEC_IO
889         exec_f = exec_f_back;
890 #endif
891 #ifdef X0212_ENABLE
892         x0212_f = x0212_f_back;
893 #endif
894         x0213_f = x0213_f_back;
895     }
896
897     if (binmode_f == TRUE)
898 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
899     if (freopen("","wb",stdout) == NULL)
900         return (-1);
901 #else
902     setbinmode(stdout);
903 #endif
904
905     if (unbuf_f)
906       setbuf(stdout, (char *) NULL);
907     else
908       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
909
910     if (argc == 0) {
911       if (binmode_f == TRUE)
912 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
913       if (freopen("","rb",stdin) == NULL) return (-1);
914 #else
915       setbinmode(stdin);
916 #endif
917       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
918       if (nop_f)
919           noconvert(stdin);
920       else {
921           kanji_convert(stdin);
922           if (guess_f) print_guessed_code(NULL);
923       }
924     } else {
925       int nfiles = argc;
926         int is_argument_error = FALSE;
927       while (argc--) {
928             input_codename = NULL;
929             input_newline = 0;
930 #ifdef CHECK_OPTION
931             iconv_for_check = 0;
932 #endif
933           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
934                 perror(*(argv-1));
935                 is_argument_error = TRUE;
936                 continue;
937           } else {
938 #ifdef OVERWRITE
939               int fd = 0;
940               int fd_backup = 0;
941 #endif
942
943 /* reopen file for stdout */
944               if (file_out_f == TRUE) {
945 #ifdef OVERWRITE
946                   if (overwrite_f){
947                       outfname = malloc(strlen(origfname)
948                                         + strlen(".nkftmpXXXXXX")
949                                         + 1);
950                       if (!outfname){
951                           perror(origfname);
952                           return -1;
953                       }
954                       strcpy(outfname, origfname);
955 #ifdef MSDOS
956                       {
957                           int i;
958                           for (i = strlen(outfname); i; --i){
959                               if (outfname[i - 1] == '/'
960                                   || outfname[i - 1] == '\\'){
961                                   break;
962                               }
963                           }
964                           outfname[i] = '\0';
965                       }
966                       strcat(outfname, "ntXXXXXX");
967                       mktemp(outfname);
968                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
969                                 S_IREAD | S_IWRITE);
970 #else
971                       strcat(outfname, ".nkftmpXXXXXX");
972                       fd = mkstemp(outfname);
973 #endif
974                       if (fd < 0
975                           || (fd_backup = dup(fileno(stdout))) < 0
976                           || dup2(fd, fileno(stdout)) < 0
977                           ){
978                           perror(origfname);
979                           return -1;
980                       }
981                   }else
982 #endif
983                   if(argc == 1) {
984                       outfname = *argv++;
985                       argc--;
986                   } else {
987                       outfname = "nkf.out";
988                   }
989
990                   if(freopen(outfname, "w", stdout) == NULL) {
991                       perror (outfname);
992                       return (-1);
993                   }
994                   if (binmode_f == TRUE) {
995 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
996                       if (freopen("","wb",stdout) == NULL)
997                            return (-1);
998 #else
999                       setbinmode(stdout);
1000 #endif
1001                   }
1002               }
1003               if (binmode_f == TRUE)
1004 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1005                  if (freopen("","rb",fin) == NULL)
1006                     return (-1);
1007 #else
1008                  setbinmode(fin);
1009 #endif
1010               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
1011               if (nop_f)
1012                   noconvert(fin);
1013               else {
1014                   char *filename = NULL;
1015                   kanji_convert(fin);
1016                   if (nfiles > 1) filename = origfname;
1017                   if (guess_f) print_guessed_code(filename);
1018               }
1019               fclose(fin);
1020 #ifdef OVERWRITE
1021               if (overwrite_f) {
1022                   struct stat     sb;
1023 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1024                   time_t tb[2];
1025 #else
1026                   struct utimbuf  tb;
1027 #endif
1028
1029                   fflush(stdout);
1030                   close(fd);
1031                   if (dup2(fd_backup, fileno(stdout)) < 0){
1032                       perror("dup2");
1033                   }
1034                   if (stat(origfname, &sb)) {
1035                       fprintf(stderr, "Can't stat %s\n", origfname);
1036                   }
1037                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
1038                   if (chmod(outfname, sb.st_mode)) {
1039                       fprintf(stderr, "Can't set permission %s\n", outfname);
1040                   }
1041
1042                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
1043                     if(preserve_time_f){
1044 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1045                         tb[0] = tb[1] = sb.st_mtime;
1046                         if (utime(outfname, tb)) {
1047                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1048                         }
1049 #else
1050                         tb.actime  = sb.st_atime;
1051                         tb.modtime = sb.st_mtime;
1052                         if (utime(outfname, &tb)) {
1053                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1054                         }
1055 #endif
1056                     }
1057                     if(backup_f){
1058                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
1059 #ifdef MSDOS
1060                         unlink(backup_filename);
1061 #endif
1062                         if (rename(origfname, backup_filename)) {
1063                             perror(backup_filename);
1064                             fprintf(stderr, "Can't rename %s to %s\n",
1065                                     origfname, backup_filename);
1066                         }
1067                     }else{
1068 #ifdef MSDOS
1069                         if (unlink(origfname)){
1070                             perror(origfname);
1071                         }
1072 #endif
1073                     }
1074                   if (rename(outfname, origfname)) {
1075                       perror(origfname);
1076                       fprintf(stderr, "Can't rename %s to %s\n",
1077                               outfname, origfname);
1078                   }
1079                   free(outfname);
1080               }
1081 #endif
1082           }
1083       }
1084         if (is_argument_error)
1085             return(-1);
1086     }
1087 #ifdef EASYWIN /*Easy Win */
1088     if (file_out_f == FALSE)
1089         scanf("%d",&end_check);
1090     else
1091         fclose(stdout);
1092 #else /* for Other OS */
1093     if (file_out_f == TRUE)
1094         fclose(stdout);
1095 #endif /*Easy Win */
1096     return (0);
1097 }
1098 #endif /* WIN32DLL */
1099
1100 #ifdef OVERWRITE
1101 char *get_backup_filename(const char *suffix, const char *filename)
1102 {
1103     char *backup_filename;
1104     int asterisk_count = 0;
1105     int i, j;
1106     int filename_length = strlen(filename);
1107
1108     for(i = 0; suffix[i]; i++){
1109         if(suffix[i] == '*') asterisk_count++;
1110     }
1111
1112     if(asterisk_count){
1113         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1114         if (!backup_filename){
1115             perror("Can't malloc backup filename.");
1116             return NULL;
1117         }
1118
1119         for(i = 0, j = 0; suffix[i];){
1120             if(suffix[i] == '*'){
1121                 backup_filename[j] = '\0';
1122                 strncat(backup_filename, filename, filename_length);
1123                 i++;
1124                 j += filename_length;
1125             }else{
1126                 backup_filename[j++] = suffix[i++];
1127             }
1128         }
1129         backup_filename[j] = '\0';
1130     }else{
1131         j = strlen(suffix) + filename_length;
1132         backup_filename = malloc( + 1);
1133         strcpy(backup_filename, filename);
1134         strcat(backup_filename, suffix);
1135         backup_filename[j] = '\0';
1136     }
1137     return backup_filename;
1138 }
1139 #endif
1140
1141 static const struct {
1142     const char *name;
1143     const char *alias;
1144 } long_option[] = {
1145     {"ic=", ""},
1146     {"oc=", ""},
1147     {"base64","jMB"},
1148     {"euc","e"},
1149     {"euc-input","E"},
1150     {"fj","jm"},
1151     {"help","v"},
1152     {"jis","j"},
1153     {"jis-input","J"},
1154     {"mac","sLm"},
1155     {"mime","jM"},
1156     {"mime-input","m"},
1157     {"msdos","sLw"},
1158     {"sjis","s"},
1159     {"sjis-input","S"},
1160     {"unix","eLu"},
1161     {"version","V"},
1162     {"windows","sLw"},
1163     {"hiragana","h1"},
1164     {"katakana","h2"},
1165     {"katakana-hiragana","h3"},
1166     {"guess=", ""},
1167     {"guess", "g2"},
1168     {"cp932", ""},
1169     {"no-cp932", ""},
1170 #ifdef X0212_ENABLE
1171     {"x0212", ""},
1172 #endif
1173 #ifdef UTF8_OUTPUT_ENABLE
1174     {"utf8", "w"},
1175     {"utf16", "w16"},
1176     {"ms-ucs-map", ""},
1177     {"fb-skip", ""},
1178     {"fb-html", ""},
1179     {"fb-xml", ""},
1180     {"fb-perl", ""},
1181     {"fb-java", ""},
1182     {"fb-subchar", ""},
1183     {"fb-subchar=", ""},
1184 #endif
1185 #ifdef UTF8_INPUT_ENABLE
1186     {"utf8-input", "W"},
1187     {"utf16-input", "W16"},
1188     {"no-cp932ext", ""},
1189     {"no-best-fit-chars",""},
1190 #endif
1191 #ifdef UNICODE_NORMALIZATION
1192     {"utf8mac-input", ""},
1193 #endif
1194 #ifdef OVERWRITE
1195     {"overwrite", ""},
1196     {"overwrite=", ""},
1197     {"in-place", ""},
1198     {"in-place=", ""},
1199 #endif
1200 #ifdef INPUT_OPTION
1201     {"cap-input", ""},
1202     {"url-input", ""},
1203 #endif
1204 #ifdef NUMCHAR_OPTION
1205     {"numchar-input", ""},
1206 #endif
1207 #ifdef CHECK_OPTION
1208     {"no-output", ""},
1209     {"debug", ""},
1210 #endif
1211 #ifdef SHIFTJIS_CP932
1212     {"cp932inv", ""},
1213 #endif
1214 #ifdef EXEC_IO
1215     {"exec-in", ""},
1216     {"exec-out", ""},
1217 #endif
1218     {"prefix=", ""},
1219 };
1220
1221 static void set_input_encoding(nkf_encoding *enc)
1222 {
1223     switch (nkf_enc_to_index(enc)) {
1224     case CP50220:
1225     case CP50221:
1226     case CP50222:
1227 #ifdef SHIFTJIS_CP932
1228         cp51932_f = TRUE;
1229 #endif
1230 #ifdef UTF8_OUTPUT_ENABLE
1231         ms_ucs_map_f = UCS_MAP_CP932;
1232 #endif
1233         break;
1234     case ISO_2022_JP_1:
1235 #ifdef X0212_ENABLE
1236         x0212_f = TRUE;
1237 #endif
1238         break;
1239     case ISO_2022_JP_3:
1240 #ifdef X0212_ENABLE
1241         x0212_f = TRUE;
1242 #endif
1243         x0213_f = TRUE;
1244         break;
1245     case SHIFT_JIS:
1246         break;
1247     case WINDOWS_31J:
1248 #ifdef SHIFTJIS_CP932
1249         cp51932_f = TRUE;
1250 #endif
1251 #ifdef UTF8_OUTPUT_ENABLE
1252         ms_ucs_map_f = UCS_MAP_CP932;
1253 #endif
1254         break;
1255     case EUC_JP:
1256         break;
1257     case CP10001:
1258 #ifdef SHIFTJIS_CP932
1259         cp51932_f = TRUE;
1260 #endif
1261 #ifdef UTF8_OUTPUT_ENABLE
1262         ms_ucs_map_f = UCS_MAP_CP10001;
1263 #endif
1264         break;
1265     case CP51932:
1266 #ifdef SHIFTJIS_CP932
1267         cp51932_f = TRUE;
1268 #endif
1269 #ifdef UTF8_OUTPUT_ENABLE
1270         ms_ucs_map_f = UCS_MAP_CP932;
1271 #endif
1272         break;
1273     case EUCJP_MS:
1274 #ifdef SHIFTJIS_CP932
1275         cp51932_f = FALSE;
1276 #endif
1277 #ifdef UTF8_OUTPUT_ENABLE
1278         ms_ucs_map_f = UCS_MAP_MS;
1279 #endif
1280         break;
1281     case EUCJP_ASCII:
1282 #ifdef SHIFTJIS_CP932
1283         cp51932_f = FALSE;
1284 #endif
1285 #ifdef UTF8_OUTPUT_ENABLE
1286         ms_ucs_map_f = UCS_MAP_ASCII;
1287 #endif
1288         break;
1289     case SHIFT_JISX0213:
1290     case SHIFT_JIS_2004:
1291         x0213_f = TRUE;
1292 #ifdef SHIFTJIS_CP932
1293         cp51932_f = FALSE;
1294 #endif
1295         break;
1296     case EUC_JISX0213:
1297     case EUC_JIS_2004:
1298         x0213_f = TRUE;
1299 #ifdef SHIFTJIS_CP932
1300         cp51932_f = FALSE;
1301 #endif
1302         break;
1303 #ifdef UTF8_INPUT_ENABLE
1304 #ifdef UNICODE_NORMALIZATION
1305     case UTF8_MAC:
1306         nfc_f = TRUE;
1307         break;
1308 #endif
1309     case UTF_16:
1310     case UTF_16BE:
1311     case UTF_16BE_BOM:
1312         input_endian = ENDIAN_BIG;
1313         break;
1314     case UTF_16LE:
1315     case UTF_16LE_BOM:
1316         input_endian = ENDIAN_LITTLE;
1317         break;
1318     case UTF_32:
1319     case UTF_32BE:
1320     case UTF_32BE_BOM:
1321         input_endian = ENDIAN_BIG;
1322         break;
1323     case UTF_32LE:
1324     case UTF_32LE_BOM:
1325         input_endian = ENDIAN_LITTLE;
1326         break;
1327 #endif
1328     }
1329 }
1330
1331 static void set_output_encoding(nkf_encoding *enc)
1332 {
1333     switch (nkf_enc_to_index(enc)) {
1334     case CP50220:
1335         x0201_f = TRUE;
1336 #ifdef SHIFTJIS_CP932
1337         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1338 #endif
1339 #ifdef UTF8_OUTPUT_ENABLE
1340         ms_ucs_map_f = UCS_MAP_CP932;
1341 #endif
1342         break;
1343     case CP50221:
1344 #ifdef SHIFTJIS_CP932
1345         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1346 #endif
1347 #ifdef UTF8_OUTPUT_ENABLE
1348         ms_ucs_map_f = UCS_MAP_CP932;
1349 #endif
1350         break;
1351     case ISO_2022_JP_1:
1352 #ifdef X0212_ENABLE
1353         x0212_f = TRUE;
1354 #endif
1355 #ifdef SHIFTJIS_CP932
1356         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1357 #endif
1358         break;
1359     case ISO_2022_JP_3:
1360 #ifdef X0212_ENABLE
1361         x0212_f = TRUE;
1362 #endif
1363         x0213_f = TRUE;
1364 #ifdef SHIFTJIS_CP932
1365         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1366 #endif
1367         break;
1368     case SHIFT_JIS:
1369         break;
1370     case WINDOWS_31J:
1371 #ifdef UTF8_OUTPUT_ENABLE
1372         ms_ucs_map_f = UCS_MAP_CP932;
1373 #endif
1374         break;
1375     case CP10001:
1376 #ifdef UTF8_OUTPUT_ENABLE
1377         ms_ucs_map_f = UCS_MAP_CP10001;
1378 #endif
1379         break;
1380     case EUC_JP:
1381         x0212_f = TRUE;
1382 #ifdef SHIFTJIS_CP932
1383         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1384 #endif
1385 #ifdef UTF8_OUTPUT_ENABLE
1386         ms_ucs_map_f = UCS_MAP_CP932;
1387 #endif
1388         break;
1389     case CP51932:
1390 #ifdef SHIFTJIS_CP932
1391         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1392 #endif
1393 #ifdef UTF8_OUTPUT_ENABLE
1394         ms_ucs_map_f = UCS_MAP_CP932;
1395 #endif
1396         break;
1397     case EUCJP_MS:
1398 #ifdef X0212_ENABLE
1399         x0212_f = TRUE;
1400 #endif
1401 #ifdef UTF8_OUTPUT_ENABLE
1402         ms_ucs_map_f = UCS_MAP_MS;
1403 #endif
1404         break;
1405     case EUCJP_ASCII:
1406 #ifdef X0212_ENABLE
1407         x0212_f = TRUE;
1408 #endif
1409 #ifdef UTF8_OUTPUT_ENABLE
1410         ms_ucs_map_f = UCS_MAP_ASCII;
1411 #endif
1412         break;
1413     case SHIFT_JISX0213:
1414     case SHIFT_JIS_2004:
1415         x0213_f = TRUE;
1416 #ifdef SHIFTJIS_CP932
1417         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1418 #endif
1419         break;
1420     case EUC_JISX0213:
1421     case EUC_JIS_2004:
1422 #ifdef X0212_ENABLE
1423         x0212_f = TRUE;
1424 #endif
1425         x0213_f = TRUE;
1426 #ifdef SHIFTJIS_CP932
1427         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1428 #endif
1429         break;
1430 #ifdef UTF8_OUTPUT_ENABLE
1431     case UTF_8_BOM:
1432         output_bom_f = TRUE;
1433         break;
1434     case UTF_16:
1435     case UTF_16BE_BOM:
1436         output_bom_f = TRUE;
1437         break;
1438     case UTF_16LE:
1439         output_endian = ENDIAN_LITTLE;
1440         output_bom_f = FALSE;
1441         break;
1442     case UTF_16LE_BOM:
1443         output_endian = ENDIAN_LITTLE;
1444         output_bom_f = TRUE;
1445         break;
1446     case UTF_32BE_BOM:
1447         output_bom_f = TRUE;
1448         break;
1449     case UTF_32LE:
1450         output_endian = ENDIAN_LITTLE;
1451         output_bom_f = FALSE;
1452         break;
1453     case UTF_32LE_BOM:
1454         output_endian = ENDIAN_LITTLE;
1455         output_bom_f = TRUE;
1456         break;
1457 #endif
1458     }
1459 }
1460
1461 static int option_mode = 0;
1462
1463 void options(unsigned char *cp)
1464 {
1465     nkf_char i, j;
1466     unsigned char *p;
1467     unsigned char *cp_back = NULL;
1468     char codeset[32];
1469     nkf_encoding *enc;
1470
1471     if (option_mode==1)
1472         return;
1473     while(*cp && *cp++!='-');
1474     while (*cp || cp_back) {
1475         if(!*cp){
1476             cp = cp_back;
1477             cp_back = NULL;
1478             continue;
1479         }
1480         p = 0;
1481         switch (*cp++) {
1482         case '-':  /* literal options */
1483             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1484                 option_mode = 1;
1485                 return;
1486             }
1487             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1488                 p = (unsigned char *)long_option[i].name;
1489                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1490                 if (*p == cp[j] || cp[j] == SP){
1491                     p = &cp[j] + 1;
1492                     break;
1493                 }
1494                 p = 0;
1495             }
1496             if (p == 0) {
1497                 fprintf(stderr, "unknown long option: --%s\n", cp);
1498                 return;
1499             }
1500             while(*cp && *cp != SP && cp++);
1501             if (long_option[i].alias[0]){
1502                 cp_back = cp;
1503                 cp = (unsigned char *)long_option[i].alias;
1504             }else{
1505                 if (strcmp(long_option[i].name, "ic=") == 0){
1506                     nkf_str_upcase((char *)p, codeset, 32);
1507                     enc = nkf_enc_find(codeset);
1508                     if (!enc) continue;
1509                     input_encoding = enc;
1510                     continue;
1511                 }
1512                 if (strcmp(long_option[i].name, "oc=") == 0){
1513                     nkf_str_upcase((char *)p, codeset, 32);
1514                     enc = nkf_enc_find(codeset);
1515                     if (enc <= 0) continue;
1516                     output_encoding = enc;
1517                     continue;
1518                 }
1519                 if (strcmp(long_option[i].name, "guess=") == 0){
1520                     if (p[0] == '0' || p[0] == '1') {
1521                         guess_f = 1;
1522                     } else {
1523                         guess_f = 2;
1524                     }
1525                     continue;
1526                 }
1527 #ifdef OVERWRITE
1528                 if (strcmp(long_option[i].name, "overwrite") == 0){
1529                     file_out_f = TRUE;
1530                     overwrite_f = TRUE;
1531                     preserve_time_f = TRUE;
1532                     continue;
1533                 }
1534                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1535                     file_out_f = TRUE;
1536                     overwrite_f = TRUE;
1537                     preserve_time_f = TRUE;
1538                     backup_f = TRUE;
1539                     backup_suffix = malloc(strlen((char *) p) + 1);
1540                     strcpy(backup_suffix, (char *) p);
1541                     continue;
1542                 }
1543                 if (strcmp(long_option[i].name, "in-place") == 0){
1544                     file_out_f = TRUE;
1545                     overwrite_f = TRUE;
1546                     preserve_time_f = FALSE;
1547                     continue;
1548                 }
1549                 if (strcmp(long_option[i].name, "in-place=") == 0){
1550                     file_out_f = TRUE;
1551                     overwrite_f = TRUE;
1552                     preserve_time_f = FALSE;
1553                     backup_f = TRUE;
1554                     backup_suffix = malloc(strlen((char *) p) + 1);
1555                     strcpy(backup_suffix, (char *) p);
1556                     continue;
1557                 }
1558 #endif
1559 #ifdef INPUT_OPTION
1560                 if (strcmp(long_option[i].name, "cap-input") == 0){
1561                     cap_f = TRUE;
1562                     continue;
1563                 }
1564                 if (strcmp(long_option[i].name, "url-input") == 0){
1565                     url_f = TRUE;
1566                     continue;
1567                 }
1568 #endif
1569 #ifdef NUMCHAR_OPTION
1570                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1571                     numchar_f = TRUE;
1572                     continue;
1573                 }
1574 #endif
1575 #ifdef CHECK_OPTION
1576                 if (strcmp(long_option[i].name, "no-output") == 0){
1577                     noout_f = TRUE;
1578                     continue;
1579                 }
1580                 if (strcmp(long_option[i].name, "debug") == 0){
1581                     debug_f = TRUE;
1582                     continue;
1583                 }
1584 #endif
1585                 if (strcmp(long_option[i].name, "cp932") == 0){
1586 #ifdef SHIFTJIS_CP932
1587                     cp51932_f = TRUE;
1588                     cp932inv_f = -TRUE;
1589 #endif
1590 #ifdef UTF8_OUTPUT_ENABLE
1591                     ms_ucs_map_f = UCS_MAP_CP932;
1592 #endif
1593                     continue;
1594                 }
1595                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1596 #ifdef SHIFTJIS_CP932
1597                     cp51932_f = FALSE;
1598                     cp932inv_f = FALSE;
1599 #endif
1600 #ifdef UTF8_OUTPUT_ENABLE
1601                     ms_ucs_map_f = UCS_MAP_ASCII;
1602 #endif
1603                     continue;
1604                 }
1605 #ifdef SHIFTJIS_CP932
1606                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1607                     cp932inv_f = -TRUE;
1608                     continue;
1609                 }
1610 #endif
1611
1612 #ifdef X0212_ENABLE
1613                 if (strcmp(long_option[i].name, "x0212") == 0){
1614                     x0212_f = TRUE;
1615                     continue;
1616                 }
1617 #endif
1618
1619 #ifdef EXEC_IO
1620                   if (strcmp(long_option[i].name, "exec-in") == 0){
1621                       exec_f = 1;
1622                       return;
1623                   }
1624                   if (strcmp(long_option[i].name, "exec-out") == 0){
1625                       exec_f = -1;
1626                       return;
1627                   }
1628 #endif
1629 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1630                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1631                     no_cp932ext_f = TRUE;
1632                     continue;
1633                 }
1634                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1635                     no_best_fit_chars_f = TRUE;
1636                     continue;
1637                 }
1638                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1639                     encode_fallback = NULL;
1640                     continue;
1641                 }
1642                 if (strcmp(long_option[i].name, "fb-html") == 0){
1643                     encode_fallback = encode_fallback_html;
1644                     continue;
1645                 }
1646                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1647                     encode_fallback = encode_fallback_xml;
1648                     continue;
1649                 }
1650                 if (strcmp(long_option[i].name, "fb-java") == 0){
1651                     encode_fallback = encode_fallback_java;
1652                     continue;
1653                 }
1654                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1655                     encode_fallback = encode_fallback_perl;
1656                     continue;
1657                 }
1658                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1659                     encode_fallback = encode_fallback_subchar;
1660                     continue;
1661                 }
1662                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1663                     encode_fallback = encode_fallback_subchar;
1664                     unicode_subchar = 0;
1665                     if (p[0] != '0'){
1666                         /* decimal number */
1667                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1668                             unicode_subchar *= 10;
1669                             unicode_subchar += hex2bin(p[i]);
1670                         }
1671                     }else if(p[1] == 'x' || p[1] == 'X'){
1672                         /* hexadecimal number */
1673                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1674                             unicode_subchar <<= 4;
1675                             unicode_subchar |= hex2bin(p[i]);
1676                         }
1677                     }else{
1678                         /* octal number */
1679                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1680                             unicode_subchar *= 8;
1681                             unicode_subchar += hex2bin(p[i]);
1682                         }
1683                     }
1684                     w16e_conv(unicode_subchar, &i, &j);
1685                     unicode_subchar = i<<8 | j;
1686                     continue;
1687                 }
1688 #endif
1689 #ifdef UTF8_OUTPUT_ENABLE
1690                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1691                     ms_ucs_map_f = UCS_MAP_MS;
1692                     continue;
1693                 }
1694 #endif
1695 #ifdef UNICODE_NORMALIZATION
1696                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1697                     nfc_f = TRUE;
1698                     continue;
1699                 }
1700 #endif
1701                 if (strcmp(long_option[i].name, "prefix=") == 0){
1702                     if (nkf_isgraph(p[0])){
1703                         for (i = 1; nkf_isgraph(p[i]); i++){
1704                             prefix_table[p[i]] = p[0];
1705                         }
1706                     }
1707                     continue;
1708                 }
1709             }
1710             continue;
1711         case 'b':           /* buffered mode */
1712             unbuf_f = FALSE;
1713             continue;
1714         case 'u':           /* non bufferd mode */
1715             unbuf_f = TRUE;
1716             continue;
1717         case 't':           /* transparent mode */
1718             if (*cp=='1') {
1719                 /* alias of -t */
1720                 cp++;
1721                 nop_f = TRUE;
1722             } else if (*cp=='2') {
1723                 /*
1724                  * -t with put/get
1725                  *
1726                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1727                  *
1728                  */
1729                 cp++;
1730                 nop_f = 2;
1731             } else
1732                 nop_f = TRUE;
1733             continue;
1734         case 'j':           /* JIS output */
1735         case 'n':
1736             output_encoding = nkf_enc_from_index(ISO_2022_JP);
1737             continue;
1738         case 'e':           /* AT&T EUC output */
1739             output_encoding = nkf_enc_from_index(EUC_JP);
1740             continue;
1741         case 's':           /* SJIS output */
1742             output_encoding = nkf_enc_from_index(WINDOWS_31J);
1743             continue;
1744         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1745             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1746             input_encoding = nkf_enc_from_index(ISO_8859_1);
1747             continue;
1748         case 'i':           /* Kanji IN ESC-$-@/B */
1749             if (*cp=='@'||*cp=='B')
1750                 kanji_intro = *cp++;
1751             continue;
1752         case 'o':           /* ASCII IN ESC-(-J/B */
1753             if (*cp=='J'||*cp=='B'||*cp=='H')
1754                 ascii_intro = *cp++;
1755             continue;
1756         case 'h':
1757             /*
1758                 bit:1   katakana->hiragana
1759                 bit:2   hiragana->katakana
1760             */
1761             if ('9'>= *cp && *cp>='0')
1762                 hira_f |= (*cp++ -'0');
1763             else
1764                 hira_f |= 1;
1765             continue;
1766         case 'r':
1767             rot_f = TRUE;
1768             continue;
1769 #if defined(MSDOS) || defined(__OS2__)
1770         case 'T':
1771             binmode_f = FALSE;
1772             continue;
1773 #endif
1774 #ifndef PERL_XS
1775         case 'V':
1776             show_configuration();
1777             exit(1);
1778             break;
1779         case 'v':
1780             usage();
1781             exit(1);
1782             break;
1783 #endif
1784 #ifdef UTF8_OUTPUT_ENABLE
1785         case 'w':           /* UTF-8 output */
1786             if (cp[0] == '8') {
1787                 cp++;
1788                 if (cp[0] == '0'){
1789                     cp++;
1790                     output_encoding = nkf_enc_from_index(UTF_8N);
1791                 } else {
1792                     output_bom_f = TRUE;
1793                     output_encoding = nkf_enc_from_index(UTF_8_BOM);
1794                 }
1795             } else {
1796                 int enc_idx;
1797                 if ('1'== cp[0] && '6'==cp[1]) {
1798                     cp += 2;
1799                     enc_idx = UTF_16;
1800                 } else if ('3'== cp[0] && '2'==cp[1]) {
1801                     cp += 2;
1802                     enc_idx = UTF_32;
1803                 } else {
1804                     output_encoding = nkf_enc_from_index(UTF_8);
1805                     continue;
1806                 }
1807                 if (cp[0]=='L') {
1808                     cp++;
1809                     output_endian = ENDIAN_LITTLE;
1810                 } else if (cp[0] == 'B') {
1811                     cp++;
1812                 } else {
1813                     output_encoding = nkf_enc_from_index(enc_idx);
1814                     continue;
1815                 }
1816                 if (cp[0] == '0'){
1817                     cp++;
1818                     enc_idx = enc_idx == UTF_16
1819                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1820                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1821                 } else {
1822                     output_bom_f = TRUE;
1823                     enc_idx = enc_idx == UTF_16
1824                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
1825                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
1826                 }
1827                 output_encoding = nkf_enc_from_index(enc_idx);
1828             }
1829             continue;
1830 #endif
1831 #ifdef UTF8_INPUT_ENABLE
1832         case 'W':           /* UTF input */
1833             if (cp[0] == '8') {
1834                 cp++;
1835                 input_encoding = nkf_enc_from_index(UTF_8);
1836             }else{
1837                 int enc_idx;
1838                 if ('1'== cp[0] && '6'==cp[1]) {
1839                     cp += 2;
1840                     input_endian = ENDIAN_BIG;
1841                     enc_idx = UTF_16;
1842                 } else if ('3'== cp[0] && '2'==cp[1]) {
1843                     cp += 2;
1844                     input_endian = ENDIAN_BIG;
1845                     enc_idx = UTF_32;
1846                 } else {
1847                     input_encoding = nkf_enc_from_index(UTF_8);
1848                     continue;
1849                 }
1850                 if (cp[0]=='L') {
1851                     cp++;
1852                     input_endian = ENDIAN_LITTLE;
1853                 } else if (cp[0] == 'B') {
1854                     cp++;
1855                     input_endian = ENDIAN_BIG;
1856                 }
1857                 enc_idx = enc_idx == UTF_16
1858                     ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1859                     : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1860                 input_encoding = nkf_enc_from_index(enc_idx);
1861             }
1862             continue;
1863 #endif
1864         /* Input code assumption */
1865         case 'J':   /* ISO-2022-JP input */
1866             input_encoding = nkf_enc_from_index(ISO_2022_JP);
1867             continue;
1868         case 'E':   /* EUC-JP input */
1869             input_encoding = nkf_enc_from_index(EUC_JP);
1870             continue;
1871         case 'S':   /* Windows-31J input */
1872             input_encoding = nkf_enc_from_index(WINDOWS_31J);
1873             continue;
1874         case 'Z':   /* Convert X0208 alphabet to asii */
1875             /* alpha_f
1876                bit:0   Convert JIS X 0208 Alphabet to ASCII
1877                bit:1   Convert Kankaku to one space
1878                bit:2   Convert Kankaku to two spaces
1879                bit:3   Convert HTML Entity
1880                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1881             */
1882             while ('0'<= *cp && *cp <='9') {
1883                 alpha_f |= 1 << (*cp++ - '0');
1884             }
1885             if (!alpha_f) alpha_f = 1;
1886             continue;
1887         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1888             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1889             /* accept  X0201
1890                     ESC-(-I     in JIS, EUC, MS Kanji
1891                     SI/SO       in JIS, EUC, MS Kanji
1892                     SSO         in EUC, JIS, not in MS Kanji
1893                     MS Kanji (0xa0-0xdf)
1894                output  X0201
1895                     ESC-(-I     in JIS (0x20-0x5f)
1896                     SSO         in EUC (0xa0-0xdf)
1897                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1898             */
1899             continue;
1900         case 'X':   /* Convert X0201 kana to X0208 */
1901             x0201_f = TRUE;
1902             continue;
1903         case 'F':   /* prserve new lines */
1904             fold_preserve_f = TRUE;
1905         case 'f':   /* folding -f60 or -f */
1906             fold_f = TRUE;
1907             fold_len = 0;
1908             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1909                 fold_len *= 10;
1910                 fold_len += *cp++ - '0';
1911             }
1912             if (!(0<fold_len && fold_len<BUFSIZ))
1913                 fold_len = DEFAULT_FOLD;
1914             if (*cp=='-') {
1915                 fold_margin = 0;
1916                 cp++;
1917                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1918                     fold_margin *= 10;
1919                     fold_margin += *cp++ - '0';
1920                 }
1921             }
1922             continue;
1923         case 'm':   /* MIME support */
1924             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1925             if (*cp=='B'||*cp=='Q') {
1926                 mime_decode_mode = *cp++;
1927                 mimebuf_f = FIXED_MIME;
1928             } else if (*cp=='N') {
1929                 mime_f = TRUE; cp++;
1930             } else if (*cp=='S') {
1931                 mime_f = STRICT_MIME; cp++;
1932             } else if (*cp=='0') {
1933                 mime_decode_f = FALSE;
1934                 mime_f = FALSE; cp++;
1935             } else {
1936                 mime_f = STRICT_MIME;
1937             }
1938             continue;
1939         case 'M':   /* MIME output */
1940             if (*cp=='B') {
1941                 mimeout_mode = 'B';
1942                 mimeout_f = FIXED_MIME; cp++;
1943             } else if (*cp=='Q') {
1944                 mimeout_mode = 'Q';
1945                 mimeout_f = FIXED_MIME; cp++;
1946             } else {
1947                 mimeout_f = TRUE;
1948             }
1949             continue;
1950         case 'B':   /* Broken JIS support */
1951             /*  bit:0   no ESC JIS
1952                 bit:1   allow any x on ESC-(-x or ESC-$-x
1953                 bit:2   reset to ascii on NL
1954             */
1955             if ('9'>= *cp && *cp>='0')
1956                 broken_f |= 1<<(*cp++ -'0');
1957             else
1958                 broken_f |= TRUE;
1959             continue;
1960 #ifndef PERL_XS
1961         case 'O':/* for Output file */
1962             file_out_f = TRUE;
1963             continue;
1964 #endif
1965         case 'c':/* add cr code */
1966             nlmode_f = CRLF;
1967             continue;
1968         case 'd':/* delete cr code */
1969             nlmode_f = LF;
1970             continue;
1971         case 'I':   /* ISO-2022-JP output */
1972             iso2022jp_f = TRUE;
1973             continue;
1974         case 'L':  /* line mode */
1975             if (*cp=='u') {         /* unix */
1976                 nlmode_f = LF; cp++;
1977             } else if (*cp=='m') { /* mac */
1978                 nlmode_f = CR; cp++;
1979             } else if (*cp=='w') { /* windows */
1980                 nlmode_f = CRLF; cp++;
1981             } else if (*cp=='0') { /* no conversion  */
1982                 nlmode_f = 0; cp++;
1983             }
1984             continue;
1985 #ifndef PERL_XS
1986         case 'g':
1987             if ('2' <= *cp && *cp <= '9') {
1988                 guess_f = 2;
1989                 cp++;
1990             } else if (*cp == '0' || *cp == '1') {
1991                 guess_f = 1;
1992                 cp++;
1993             } else {
1994                 guess_f = 1;
1995             }
1996             continue;
1997 #endif
1998         case SP:
1999         /* module muliple options in a string are allowed for Perl moudle  */
2000             while(*cp && *cp++!='-');
2001             continue;
2002         default:
2003             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
2004             /* bogus option but ignored */
2005             continue;
2006         }
2007     }
2008 }
2009
2010 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2011 {
2012     if (iconv_func){
2013         struct input_code *p = input_code_list;
2014         while (p->name){
2015             if (iconv_func == p->iconv_func){
2016                 return p;
2017             }
2018             p++;
2019         }
2020     }
2021     return 0;
2022 }
2023
2024 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2025 {
2026 #ifdef INPUT_CODE_FIX
2027     if (f || !input_encoding)
2028 #endif
2029         if (estab_f != f){
2030             estab_f = f;
2031         }
2032
2033     if (iconv_func
2034 #ifdef INPUT_CODE_FIX
2035         && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
2036 #endif
2037         ){
2038         iconv = iconv_func;
2039     }
2040 #ifdef CHECK_OPTION
2041     if (estab_f && iconv_for_check != iconv){
2042         struct input_code *p = find_inputcode_byfunc(iconv);
2043         if (p){
2044             set_input_codename(p->name);
2045             debug(p->name);
2046         }
2047         iconv_for_check = iconv;
2048     }
2049 #endif
2050 }
2051
2052 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
2053 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
2054 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
2055 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
2056 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2057 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
2058 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
2059 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
2060
2061 #define SCORE_INIT (SCORE_iMIME)
2062
2063 static const char score_table_A0[] = {
2064     0, 0, 0, 0,
2065     0, 0, 0, 0,
2066     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2067     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2068 };
2069
2070 static const char score_table_F0[] = {
2071     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2072     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2073     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2074     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2075 };
2076
2077 void set_code_score(struct input_code *ptr, nkf_char score)
2078 {
2079     if (ptr){
2080         ptr->score |= score;
2081     }
2082 }
2083
2084 void clr_code_score(struct input_code *ptr, nkf_char score)
2085 {
2086     if (ptr){
2087         ptr->score &= ~score;
2088     }
2089 }
2090
2091 void code_score(struct input_code *ptr)
2092 {
2093     nkf_char c2 = ptr->buf[0];
2094 #ifdef UTF8_OUTPUT_ENABLE
2095     nkf_char c1 = ptr->buf[1];
2096 #endif
2097     if (c2 < 0){
2098         set_code_score(ptr, SCORE_ERROR);
2099     }else if (c2 == SSO){
2100         set_code_score(ptr, SCORE_KANA);
2101     }else if (c2 == 0x8f){
2102         set_code_score(ptr, SCORE_X0212);
2103 #ifdef UTF8_OUTPUT_ENABLE
2104     }else if (!e2w_conv(c2, c1)){
2105         set_code_score(ptr, SCORE_NO_EXIST);
2106 #endif
2107     }else if ((c2 & 0x70) == 0x20){
2108         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2109     }else if ((c2 & 0x70) == 0x70){
2110         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2111     }else if ((c2 & 0x70) >= 0x50){
2112         set_code_score(ptr, SCORE_L2);
2113     }
2114 }
2115
2116 void status_disable(struct input_code *ptr)
2117 {
2118     ptr->stat = -1;
2119     ptr->buf[0] = -1;
2120     code_score(ptr);
2121     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2122 }
2123
2124 void status_push_ch(struct input_code *ptr, nkf_char c)
2125 {
2126     ptr->buf[ptr->index++] = c;
2127 }
2128
2129 void status_clear(struct input_code *ptr)
2130 {
2131     ptr->stat = 0;
2132     ptr->index = 0;
2133 }
2134
2135 void status_reset(struct input_code *ptr)
2136 {
2137     status_clear(ptr);
2138     ptr->score = SCORE_INIT;
2139 }
2140
2141 void status_reinit(struct input_code *ptr)
2142 {
2143     status_reset(ptr);
2144     ptr->_file_stat = 0;
2145 }
2146
2147 void status_check(struct input_code *ptr, nkf_char c)
2148 {
2149     if (c <= DEL && estab_f){
2150         status_reset(ptr);
2151     }
2152 }
2153
2154 void s_status(struct input_code *ptr, nkf_char c)
2155 {
2156     switch(ptr->stat){
2157       case -1:
2158           status_check(ptr, c);
2159           break;
2160       case 0:
2161           if (c <= DEL){
2162               break;
2163 #ifdef NUMCHAR_OPTION
2164           }else if (is_unicode_capsule(c)){
2165               break;
2166 #endif
2167           }else if (0xa1 <= c && c <= 0xdf){
2168               status_push_ch(ptr, SSO);
2169               status_push_ch(ptr, c);
2170               code_score(ptr);
2171               status_clear(ptr);
2172           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2173               ptr->stat = 1;
2174               status_push_ch(ptr, c);
2175           }else if (0xed <= c && c <= 0xee){
2176               ptr->stat = 3;
2177               status_push_ch(ptr, c);
2178 #ifdef SHIFTJIS_CP932
2179           }else if (is_ibmext_in_sjis(c)){
2180               ptr->stat = 2;
2181               status_push_ch(ptr, c);
2182 #endif /* SHIFTJIS_CP932 */
2183 #ifdef X0212_ENABLE
2184           }else if (0xf0 <= c && c <= 0xfc){
2185               ptr->stat = 1;
2186               status_push_ch(ptr, c);
2187 #endif /* X0212_ENABLE */
2188           }else{
2189               status_disable(ptr);
2190           }
2191           break;
2192       case 1:
2193           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2194               status_push_ch(ptr, c);
2195               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2196               code_score(ptr);
2197               status_clear(ptr);
2198           }else{
2199               status_disable(ptr);
2200           }
2201           break;
2202       case 2:
2203 #ifdef SHIFTJIS_CP932
2204         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2205             status_push_ch(ptr, c);
2206             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2207                 set_code_score(ptr, SCORE_CP932);
2208                 status_clear(ptr);
2209                 break;
2210             }
2211         }
2212 #endif /* SHIFTJIS_CP932 */
2213         status_disable(ptr);
2214           break;
2215       case 3:
2216           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2217               status_push_ch(ptr, c);
2218               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2219             set_code_score(ptr, SCORE_CP932);
2220             status_clear(ptr);
2221           }else{
2222               status_disable(ptr);
2223           }
2224           break;
2225     }
2226 }
2227
2228 void e_status(struct input_code *ptr, nkf_char c)
2229 {
2230     switch (ptr->stat){
2231       case -1:
2232           status_check(ptr, c);
2233           break;
2234       case 0:
2235           if (c <= DEL){
2236               break;
2237 #ifdef NUMCHAR_OPTION
2238           }else if (is_unicode_capsule(c)){
2239               break;
2240 #endif
2241           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2242               ptr->stat = 1;
2243               status_push_ch(ptr, c);
2244 #ifdef X0212_ENABLE
2245           }else if (0x8f == c){
2246               ptr->stat = 2;
2247               status_push_ch(ptr, c);
2248 #endif /* X0212_ENABLE */
2249           }else{
2250               status_disable(ptr);
2251           }
2252           break;
2253       case 1:
2254           if (0xa1 <= c && c <= 0xfe){
2255               status_push_ch(ptr, c);
2256               code_score(ptr);
2257               status_clear(ptr);
2258           }else{
2259               status_disable(ptr);
2260           }
2261           break;
2262 #ifdef X0212_ENABLE
2263       case 2:
2264           if (0xa1 <= c && c <= 0xfe){
2265               ptr->stat = 1;
2266               status_push_ch(ptr, c);
2267           }else{
2268               status_disable(ptr);
2269           }
2270 #endif /* X0212_ENABLE */
2271     }
2272 }
2273
2274 #ifdef UTF8_INPUT_ENABLE
2275 void w_status(struct input_code *ptr, nkf_char c)
2276 {
2277     switch (ptr->stat){
2278       case -1:
2279           status_check(ptr, c);
2280           break;
2281       case 0:
2282           if (c <= DEL){
2283               break;
2284 #ifdef NUMCHAR_OPTION
2285           }else if (is_unicode_capsule(c)){
2286               break;
2287 #endif
2288           }else if (0xc0 <= c && c <= 0xdf){
2289               ptr->stat = 1;
2290               status_push_ch(ptr, c);
2291           }else if (0xe0 <= c && c <= 0xef){
2292               ptr->stat = 2;
2293               status_push_ch(ptr, c);
2294           }else if (0xf0 <= c && c <= 0xf4){
2295               ptr->stat = 3;
2296               status_push_ch(ptr, c);
2297           }else{
2298               status_disable(ptr);
2299           }
2300           break;
2301       case 1:
2302       case 2:
2303           if (0x80 <= c && c <= 0xbf){
2304               status_push_ch(ptr, c);
2305               if (ptr->index > ptr->stat){
2306                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2307                              && ptr->buf[2] == 0xbf);
2308                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2309                            &ptr->buf[0], &ptr->buf[1]);
2310                   if (!bom){
2311                       code_score(ptr);
2312                   }
2313                   status_clear(ptr);
2314               }
2315           }else{
2316               status_disable(ptr);
2317           }
2318           break;
2319       case 3:
2320         if (0x80 <= c && c <= 0xbf){
2321             if (ptr->index < ptr->stat){
2322                 status_push_ch(ptr, c);
2323             } else {
2324                 status_clear(ptr);
2325             }
2326           }else{
2327               status_disable(ptr);
2328           }
2329           break;
2330     }
2331 }
2332 #endif
2333
2334 void code_status(nkf_char c)
2335 {
2336     int action_flag = 1;
2337     struct input_code *result = 0;
2338     struct input_code *p = input_code_list;
2339     while (p->name){
2340         if (!p->status_func) {
2341             ++p;
2342             continue;
2343         }
2344         if (!p->status_func)
2345             continue;
2346         (p->status_func)(p, c);
2347         if (p->stat > 0){
2348             action_flag = 0;
2349         }else if(p->stat == 0){
2350             if (result){
2351                 action_flag = 0;
2352             }else{
2353                 result = p;
2354             }
2355         }
2356         ++p;
2357     }
2358
2359     if (action_flag){
2360         if (result && !estab_f){
2361             set_iconv(TRUE, result->iconv_func);
2362         }else if (c <= DEL){
2363             struct input_code *ptr = input_code_list;
2364             while (ptr->name){
2365                 status_reset(ptr);
2366                 ++ptr;
2367             }
2368         }
2369     }
2370 }
2371
2372 #ifndef WIN32DLL
2373 nkf_char std_getc(FILE *f)
2374 {
2375     if (std_gc_ndx){
2376         return std_gc_buf[--std_gc_ndx];
2377     }
2378     return getc(f);
2379 }
2380 #endif /*WIN32DLL*/
2381
2382 nkf_char std_ungetc(nkf_char c, FILE *f)
2383 {
2384     if (std_gc_ndx == STD_GC_BUFSIZE){
2385         return EOF;
2386     }
2387     std_gc_buf[std_gc_ndx++] = c;
2388     return c;
2389 }
2390
2391 #ifndef WIN32DLL
2392 void std_putc(nkf_char c)
2393 {
2394     if(c!=EOF)
2395       putchar(c);
2396 }
2397 #endif /*WIN32DLL*/
2398
2399 #if !defined(PERL_XS) && !defined(WIN32DLL)
2400 nkf_char noconvert(FILE *f)
2401 {
2402     nkf_char    c;
2403
2404     if (nop_f == 2)
2405         module_connection();
2406     while ((c = (*i_getc)(f)) != EOF)
2407       (*o_putc)(c);
2408     (*o_putc)(EOF);
2409     return 1;
2410 }
2411 #endif
2412
2413 void module_connection(void)
2414 {
2415     if (input_encoding) set_input_encoding(input_encoding);
2416     if (!output_encoding) {
2417         output_encoding = nkf_default_encoding();
2418     }
2419     set_output_encoding(output_encoding);
2420     oconv = nkf_enc_to_oconv(output_encoding);
2421     o_putc = std_putc;
2422
2423     /* replace continucation module, from output side */
2424
2425     /* output redicrection */
2426 #ifdef CHECK_OPTION
2427     if (noout_f || guess_f){
2428         o_putc = no_putc;
2429     }
2430 #endif
2431     if (mimeout_f) {
2432         o_mputc = o_putc;
2433         o_putc = mime_putc;
2434         if (mimeout_f == TRUE) {
2435             o_base64conv = oconv; oconv = base64_conv;
2436         }
2437         /* base64_count = 0; */
2438     }
2439
2440     if (nlmode_f || guess_f) {
2441         o_nlconv = oconv; oconv = nl_conv;
2442     }
2443     if (rot_f) {
2444         o_rot_conv = oconv; oconv = rot_conv;
2445     }
2446     if (iso2022jp_f) {
2447         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2448     }
2449     if (hira_f) {
2450         o_hira_conv = oconv; oconv = hira_conv;
2451     }
2452     if (fold_f) {
2453         o_fconv = oconv; oconv = fold_conv;
2454         f_line = 0;
2455     }
2456     if (alpha_f || x0201_f) {
2457         o_zconv = oconv; oconv = z_conv;
2458     }
2459
2460     i_getc = std_getc;
2461     i_ungetc = std_ungetc;
2462     /* input redicrection */
2463 #ifdef INPUT_OPTION
2464     if (cap_f){
2465         i_cgetc = i_getc; i_getc = cap_getc;
2466         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2467     }
2468     if (url_f){
2469         i_ugetc = i_getc; i_getc = url_getc;
2470         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2471     }
2472 #endif
2473 #ifdef NUMCHAR_OPTION
2474     if (numchar_f){
2475         i_ngetc = i_getc; i_getc = numchar_getc;
2476         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2477     }
2478 #endif
2479 #ifdef UNICODE_NORMALIZATION
2480     if (nfc_f){
2481         i_nfc_getc = i_getc; i_getc = nfc_getc;
2482         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2483     }
2484 #endif
2485     if (mime_f && mimebuf_f==FIXED_MIME) {
2486         i_mgetc = i_getc; i_getc = mime_getc;
2487         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2488     }
2489     if (broken_f & 1) {
2490         i_bgetc = i_getc; i_getc = broken_getc;
2491         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2492     }
2493     if (input_encoding) {
2494         set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
2495     } else {
2496         set_iconv(FALSE, e_iconv);
2497     }
2498
2499     {
2500         struct input_code *p = input_code_list;
2501         while (p->name){
2502             status_reinit(p++);
2503         }
2504     }
2505 }
2506
2507 /*
2508  * Check and Ignore BOM
2509  */
2510 void check_bom(FILE *f)
2511 {
2512     int c2;
2513     switch(c2 = (*i_getc)(f)){
2514     case 0x00:
2515         if((c2 = (*i_getc)(f)) == 0x00){
2516             if((c2 = (*i_getc)(f)) == 0xFE){
2517                 if((c2 = (*i_getc)(f)) == 0xFF){
2518                     if(!input_encoding){
2519                         set_iconv(TRUE, w_iconv32);
2520                     }
2521                     if (iconv == w_iconv32) {
2522                         input_endian = ENDIAN_BIG;
2523                         return;
2524                     }
2525                     (*i_ungetc)(0xFF,f);
2526                 }else (*i_ungetc)(c2,f);
2527                 (*i_ungetc)(0xFE,f);
2528             }else if(c2 == 0xFF){
2529                 if((c2 = (*i_getc)(f)) == 0xFE){
2530                     if(!input_encoding){
2531                         set_iconv(TRUE, w_iconv32);
2532                     }
2533                     if (iconv == w_iconv32) {
2534                         input_endian = ENDIAN_2143;
2535                         return;
2536                     }
2537                     (*i_ungetc)(0xFF,f);
2538                 }else (*i_ungetc)(c2,f);
2539                 (*i_ungetc)(0xFF,f);
2540             }else (*i_ungetc)(c2,f);
2541             (*i_ungetc)(0x00,f);
2542         }else (*i_ungetc)(c2,f);
2543         (*i_ungetc)(0x00,f);
2544         break;
2545     case 0xEF:
2546         if((c2 = (*i_getc)(f)) == 0xBB){
2547             if((c2 = (*i_getc)(f)) == 0xBF){
2548                 if(!input_encoding){
2549                     set_iconv(TRUE, w_iconv);
2550                 }
2551                 if (iconv == w_iconv) {
2552                     return;
2553                 }
2554                 (*i_ungetc)(0xBF,f);
2555             }else (*i_ungetc)(c2,f);
2556             (*i_ungetc)(0xBB,f);
2557         }else (*i_ungetc)(c2,f);
2558         (*i_ungetc)(0xEF,f);
2559         break;
2560     case 0xFE:
2561         if((c2 = (*i_getc)(f)) == 0xFF){
2562             if((c2 = (*i_getc)(f)) == 0x00){
2563                 if((c2 = (*i_getc)(f)) == 0x00){
2564                     if(!input_encoding){
2565                         set_iconv(TRUE, w_iconv32);
2566                     }
2567                     if (iconv == w_iconv32) {
2568                         input_endian = ENDIAN_3412;
2569                         return;
2570                     }
2571                     (*i_ungetc)(0x00,f);
2572                 }else (*i_ungetc)(c2,f);
2573                 (*i_ungetc)(0x00,f);
2574             }else (*i_ungetc)(c2,f);
2575             if(!input_encoding){
2576                 set_iconv(TRUE, w_iconv16);
2577             }
2578             if (iconv == w_iconv16) {
2579                 input_endian = ENDIAN_BIG;
2580                 return;
2581             }
2582             (*i_ungetc)(0xFF,f);
2583         }else (*i_ungetc)(c2,f);
2584         (*i_ungetc)(0xFE,f);
2585         break;
2586     case 0xFF:
2587         if((c2 = (*i_getc)(f)) == 0xFE){
2588             if((c2 = (*i_getc)(f)) == 0x00){
2589                 if((c2 = (*i_getc)(f)) == 0x00){
2590                     if(!input_encoding){
2591                         set_iconv(TRUE, w_iconv32);
2592                     }
2593                     if (iconv == w_iconv32) {
2594                         input_endian = ENDIAN_LITTLE;
2595                         return;
2596                     }
2597                     (*i_ungetc)(0x00,f);
2598                 }else (*i_ungetc)(c2,f);
2599                 (*i_ungetc)(0x00,f);
2600             }else (*i_ungetc)(c2,f);
2601             if(!input_encoding){
2602                 set_iconv(TRUE, w_iconv16);
2603             }
2604             if (iconv == w_iconv16) {
2605                 input_endian = ENDIAN_LITTLE;
2606                 return;
2607             }
2608             (*i_ungetc)(0xFE,f);
2609         }else (*i_ungetc)(c2,f);
2610         (*i_ungetc)(0xFF,f);
2611         break;
2612     default:
2613         (*i_ungetc)(c2,f);
2614         break;
2615     }
2616 }
2617
2618 /*
2619    Conversion main loop. Code detection only.
2620  */
2621
2622 nkf_char kanji_convert(FILE *f)
2623 {
2624     nkf_char    c3, c2=0, c1, c0=0;
2625     int is_8bit = FALSE;
2626
2627     if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
2628         is_8bit = TRUE;
2629     }
2630
2631     input_mode = ASCII;
2632     output_mode = ASCII;
2633     shift_mode = FALSE;
2634
2635 #define NEXT continue      /* no output, get next */
2636 #define SEND ;             /* output c1 and c2, get next */
2637 #define LAST break         /* end of loop, go closing  */
2638
2639     module_connection();
2640     check_bom(f);
2641
2642     while ((c1 = (*i_getc)(f)) != EOF) {
2643 #ifdef INPUT_CODE_FIX
2644         if (!input_encoding)
2645 #endif
2646             code_status(c1);
2647         if (c2) {
2648             /* second byte */
2649             if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
2650                 /* in case of 8th bit is on */
2651                 if (!estab_f&&!mime_decode_mode) {
2652                     /* in case of not established yet */
2653                     /* It is still ambiguious */
2654                     if (h_conv(f, c2, c1)==EOF)
2655                         LAST;
2656                     else
2657                         c2 = 0;
2658                     NEXT;
2659                 } else {
2660                     /* in case of already established */
2661                     if (c1 < AT) {
2662                         /* ignore bogus code and not CP5022x UCD */
2663                         c2 = 0;
2664                         NEXT;
2665                     } else {
2666                         SEND;
2667                     }
2668                 }
2669             } else
2670                 /* second byte, 7 bit code */
2671                 /* it might be kanji shitfted */
2672                 if ((c1 == DEL) || (c1 <= SP)) {
2673                     /* ignore bogus first code */
2674                     c2 = 0;
2675                     NEXT;
2676                 } else
2677                     SEND;
2678         } else {
2679             /* first byte */
2680 #ifdef UTF8_INPUT_ENABLE
2681             if (iconv == w_iconv16) {
2682                 if (input_endian == ENDIAN_BIG) {
2683                     c2 = c1;
2684                     if ((c1 = (*i_getc)(f)) != EOF) {
2685                         if (0xD8 <= c2 && c2 <= 0xDB) {
2686                             if ((c0 = (*i_getc)(f)) != EOF) {
2687                                 c0 <<= 8;
2688                                 if ((c3 = (*i_getc)(f)) != EOF) {
2689                                     c0 |= c3;
2690                                 } else c2 = EOF;
2691                             } else c2 = EOF;
2692                         }
2693                     } else c2 = EOF;
2694                 } else {
2695                     if ((c2 = (*i_getc)(f)) != EOF) {
2696                         if (0xD8 <= c2 && c2 <= 0xDB) {
2697                             if ((c3 = (*i_getc)(f)) != EOF) {
2698                                 if ((c0 = (*i_getc)(f)) != EOF) {
2699                                     c0 <<= 8;
2700                                     c0 |= c3;
2701                                 } else c2 = EOF;
2702                             } else c2 = EOF;
2703                         }
2704                     } else c2 = EOF;
2705                 }
2706                 SEND;
2707             } else if(iconv == w_iconv32){
2708                 int c3 = c1;
2709                 if((c2 = (*i_getc)(f)) != EOF &&
2710                    (c1 = (*i_getc)(f)) != EOF &&
2711                    (c0 = (*i_getc)(f)) != EOF){
2712                     switch(input_endian){
2713                     case ENDIAN_BIG:
2714                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2715                         break;
2716                     case ENDIAN_LITTLE:
2717                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2718                         break;
2719                     case ENDIAN_2143:
2720                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2721                         break;
2722                     case ENDIAN_3412:
2723                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2724                         break;
2725                     }
2726                     c2 = 0;
2727                 }else{
2728                     c2 = EOF;
2729                 }
2730                 SEND;
2731             } else
2732 #endif
2733 #ifdef NUMCHAR_OPTION
2734             if (is_unicode_capsule(c1)){
2735                 SEND;
2736             } else
2737 #endif
2738             if (c1 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
2739                 /* 8 bit code */
2740                 if (!estab_f && !iso8859_f) {
2741                     /* not established yet */
2742                     c2 = c1;
2743                     NEXT;
2744                 } else { /* estab_f==TRUE */
2745                     if (iso8859_f) {
2746                         c2 = ISO_8859_1;
2747                         c1 &= 0x7f;
2748                         SEND;
2749                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2750                         /* SJIS X0201 Case... */
2751                         if (iso2022jp_f && !x0201_f) {
2752                             (*oconv)(GETA1, GETA2);
2753                             NEXT;
2754                         } else {
2755                             c2 = JIS_X_0201;
2756                             c1 &= 0x7f;
2757                             SEND;
2758                         }
2759                     } else if (c1==SSO && iconv != s_iconv) {
2760                         /* EUC X0201 Case */
2761                         c1 = (*i_getc)(f);  /* skip SSO */
2762                         code_status(c1);
2763                         if (SSP<=c1 && c1<0xe0) {
2764                             if (iso2022jp_f && !x0201_f) {
2765                                 (*oconv)(GETA1, GETA2);
2766                                 NEXT;
2767                             } else {
2768                                 c2 = JIS_X_0201;
2769                                 c1 &= 0x7f;
2770                                 SEND;
2771                             }
2772                         } else  { /* bogus code, skip SSO and one byte */
2773                             NEXT;
2774                         }
2775                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2776                                (c1 == 0xFD || c1 == 0xFE)) {
2777                         /* CP10001 */
2778                         c2 = JIS_X_0201;
2779                         c1 &= 0x7f;
2780                         SEND;
2781                     } else {
2782                        /* already established */
2783                        c2 = c1;
2784                        NEXT;
2785                     }
2786                 }
2787             } else if ((c1 > SP) && (c1 != DEL)) {
2788                 /* in case of Roman characters */
2789                 if (shift_mode) {
2790                     /* output 1 shifted byte */
2791                     if (iso8859_f) {
2792                         c2 = ISO_8859_1;
2793                         SEND;
2794                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2795                       /* output 1 shifted byte */
2796                         if (iso2022jp_f && !x0201_f) {
2797                             (*oconv)(GETA1, GETA2);
2798                             NEXT;
2799                         } else {
2800                             c2 = JIS_X_0201;
2801                             SEND;
2802                         }
2803                     } else {
2804                         /* look like bogus code */
2805                         NEXT;
2806                     }
2807                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
2808                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
2809                     /* in case of Kanji shifted */
2810                     c2 = c1;
2811                     NEXT;
2812                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2813                     /* Check MIME code */
2814                     if ((c1 = (*i_getc)(f)) == EOF) {
2815                         (*oconv)(0, '=');
2816                         LAST;
2817                     } else if (c1 == '?') {
2818                         /* =? is mime conversion start sequence */
2819                         if(mime_f == STRICT_MIME) {
2820                             /* check in real detail */
2821                             if (mime_begin_strict(f) == EOF)
2822                                 LAST;
2823                             else
2824                                 NEXT;
2825                         } else if (mime_begin(f) == EOF)
2826                             LAST;
2827                         else
2828                             NEXT;
2829                     } else {
2830                         (*oconv)(0, '=');
2831                         (*i_ungetc)(c1,f);
2832                         NEXT;
2833                     }
2834                 } else {
2835                     /* normal ASCII code */
2836                     SEND;
2837                 }
2838             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2839                 shift_mode = FALSE;
2840                 NEXT;
2841             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2842                 shift_mode = TRUE;
2843                 NEXT;
2844             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2845                 if ((c1 = (*i_getc)(f)) == EOF) {
2846                     /*  (*oconv)(0, ESC); don't send bogus code */
2847                     LAST;
2848                 } else if (c1 == '$') {
2849                     if ((c1 = (*i_getc)(f)) == EOF) {
2850                         /*
2851                         (*oconv)(0, ESC); don't send bogus code
2852                         (*oconv)(0, '$'); */
2853                         LAST;
2854                     } else if (c1 == '@'|| c1 == 'B') {
2855                         /* This is kanji introduction */
2856                         input_mode = JIS_X_0208;
2857                         shift_mode = FALSE;
2858                         set_input_codename("ISO-2022-JP");
2859 #ifdef CHECK_OPTION
2860                         debug("ISO-2022-JP");
2861 #endif
2862                         NEXT;
2863                     } else if (c1 == '(') {
2864                         if ((c1 = (*i_getc)(f)) == EOF) {
2865                             /* don't send bogus code
2866                             (*oconv)(0, ESC);
2867                             (*oconv)(0, '$');
2868                             (*oconv)(0, '(');
2869                                 */
2870                             LAST;
2871                         } else if (c1 == '@'|| c1 == 'B') {
2872                             /* This is kanji introduction */
2873                             input_mode = JIS_X_0208;
2874                             shift_mode = FALSE;
2875                             NEXT;
2876 #ifdef X0212_ENABLE
2877                         } else if (c1 == 'D'){
2878                             input_mode = JIS_X_0212;
2879                             shift_mode = FALSE;
2880                             NEXT;
2881 #endif /* X0212_ENABLE */
2882                         } else if (c1 == 0x4F){
2883                             input_mode = JIS_X_0213_1;
2884                             shift_mode = FALSE;
2885                             NEXT;
2886                         } else if (c1 == 0x50){
2887                             input_mode = JIS_X_0213_2;
2888                             shift_mode = FALSE;
2889                             NEXT;
2890                         } else {
2891                             /* could be some special code */
2892                             (*oconv)(0, ESC);
2893                             (*oconv)(0, '$');
2894                             (*oconv)(0, '(');
2895                             (*oconv)(0, c1);
2896                             NEXT;
2897                         }
2898                     } else if (broken_f&0x2) {
2899                         /* accept any ESC-(-x as broken code ... */
2900                         input_mode = JIS_X_0208;
2901                         shift_mode = FALSE;
2902                         NEXT;
2903                     } else {
2904                         (*oconv)(0, ESC);
2905                         (*oconv)(0, '$');
2906                         (*oconv)(0, c1);
2907                         NEXT;
2908                     }
2909                 } else if (c1 == '(') {
2910                     if ((c1 = (*i_getc)(f)) == EOF) {
2911                         /* don't send bogus code
2912                         (*oconv)(0, ESC);
2913                         (*oconv)(0, '('); */
2914                         LAST;
2915                     } else {
2916                         if (c1 == 'I') {
2917                             /* This is X0201 kana introduction */
2918                             input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
2919                             NEXT;
2920                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2921                             /* This is X0208 kanji introduction */
2922                             input_mode = ASCII; shift_mode = FALSE;
2923                             NEXT;
2924                         } else if (broken_f&0x2) {
2925                             input_mode = ASCII; shift_mode = FALSE;
2926                             NEXT;
2927                         } else {
2928                             (*oconv)(0, ESC);
2929                             (*oconv)(0, '(');
2930                             /* maintain various input_mode here */
2931                             SEND;
2932                         }
2933                     }
2934                } else if ( c1 == 'N' || c1 == 'n'){
2935                    /* SS2 */
2936                    c3 = (*i_getc)(f);  /* skip SS2 */
2937                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2938                        c1 = c3;
2939                        c2 = JIS_X_0201;
2940                        SEND;
2941                    }else{
2942                        (*i_ungetc)(c3, f);
2943                        /* lonely ESC  */
2944                        (*oconv)(0, ESC);
2945                        SEND;
2946                    }
2947                 } else {
2948                     /* lonely ESC  */
2949                     (*oconv)(0, ESC);
2950                     SEND;
2951                 }
2952             } else if (c1 == ESC && iconv == s_iconv) {
2953                 /* ESC in Shift_JIS */
2954                 if ((c1 = (*i_getc)(f)) == EOF) {
2955                     /*  (*oconv)(0, ESC); don't send bogus code */
2956                     LAST;
2957                 } else if (c1 == '$') {
2958                     /* J-PHONE emoji */
2959                     if ((c1 = (*i_getc)(f)) == EOF) {
2960                         /*
2961                            (*oconv)(0, ESC); don't send bogus code
2962                            (*oconv)(0, '$'); */
2963                         LAST;
2964                     } else {
2965                         if (('E' <= c1 && c1 <= 'G') ||
2966                             ('O' <= c1 && c1 <= 'Q')) {
2967                             /*
2968                                NUM : 0 1 2 3 4 5
2969                                BYTE: G E F O P Q
2970                                C%7 : 1 6 0 2 3 4
2971                                C%7 : 0 1 2 3 4 5 6
2972                                NUM : 2 0 3 4 5 X 1
2973                              */
2974                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2975                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2976                             while ((c1 = (*i_getc)(f)) != EOF) {
2977                                 if (SP <= c1 && c1 <= 'z') {
2978                                     (*oconv)(0, c1 + c0);
2979                                 } else break; /* c1 == SO */
2980                             }
2981                         }
2982                     }
2983                     if (c1 == EOF) LAST;
2984                     NEXT;
2985                 } else {
2986                     /* lonely ESC  */
2987                     (*oconv)(0, ESC);
2988                     SEND;
2989                 }
2990             } else if (c1 == LF || c1 == CR) {
2991                 if (broken_f&4) {
2992                     input_mode = ASCII; set_iconv(FALSE, 0);
2993                     SEND;
2994                 } else if (mime_decode_f && !mime_decode_mode){
2995                     if (c1 == LF) {
2996                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2997                             i_ungetc(SP,f);
2998                             continue;
2999                         } else {
3000                             i_ungetc(c1,f);
3001                         }
3002                         c1 = LF;
3003                         SEND;
3004                     } else  { /* if (c1 == CR)*/
3005                         if ((c1=(*i_getc)(f))!=EOF) {
3006                             if (c1==SP) {
3007                                 i_ungetc(SP,f);
3008                                 continue;
3009                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
3010                                 i_ungetc(SP,f);
3011                                 continue;
3012                             } else {
3013                                 i_ungetc(c1,f);
3014                             }
3015                             i_ungetc(LF,f);
3016                         } else {
3017                             i_ungetc(c1,f);
3018                         }
3019                         c1 = CR;
3020                         SEND;
3021                     }
3022                 }
3023             } else if (c1 == DEL && input_mode == JIS_X_0208) {
3024                 /* CP5022x */
3025                 c2 = c1;
3026                 NEXT;
3027             } else
3028                 SEND;
3029         }
3030         /* send: */
3031         switch(input_mode){
3032         case ASCII:
3033             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
3034             case -2:
3035                 /* 4 bytes UTF-8 */
3036                 if ((c0 = (*i_getc)(f)) != EOF) {
3037                     code_status(c0);
3038                     c0 <<= 8;
3039                     if ((c3 = (*i_getc)(f)) != EOF) {
3040                         code_status(c3);
3041                         (*iconv)(c2, c1, c0|c3);
3042                     }
3043                 }
3044                 break;
3045             case -1:
3046                 /* 3 bytes EUC or UTF-8 */
3047                 if ((c0 = (*i_getc)(f)) != EOF) {
3048                     code_status(c0);
3049                     (*iconv)(c2, c1, c0);
3050                 }
3051                 break;
3052             }
3053             break;
3054         case JIS_X_0208:
3055         case JIS_X_0213_1:
3056             if (ms_ucs_map_f &&
3057                 0x7F <= c2 && c2 <= 0x92 &&
3058                 0x21 <= c1 && c1 <= 0x7E) {
3059                 /* CP932 UDC */
3060                 if(c1 == 0x7F) return 0;
3061                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
3062                 c2 = 0;
3063             }
3064             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
3065             break;
3066 #ifdef X0212_ENABLE
3067         case JIS_X_0212:
3068             (*oconv)(PREFIX_EUCG3 | c2, c1);
3069             break;
3070 #endif /* X0212_ENABLE */
3071         case JIS_X_0213_2:
3072             (*oconv)(PREFIX_EUCG3 | c2, c1);
3073             break;
3074         default:
3075             (*oconv)(input_mode, c1);  /* other special case */
3076         }
3077
3078         c2 = 0;
3079         c0 = 0;
3080         continue;
3081         /* goto next_word */
3082     }
3083
3084     /* epilogue */
3085     (*iconv)(EOF, 0, 0);
3086     if (!input_codename)
3087     {
3088         if (is_8bit) {
3089             struct input_code *p = input_code_list;
3090             struct input_code *result = p;
3091             while (p->name){
3092                 if (p->score < result->score) result = p;
3093                 ++p;
3094             }
3095             set_input_codename(result->name);
3096 #ifdef CHECK_OPTION
3097             debug(result->name);
3098 #endif
3099         }
3100     }
3101     return 1;
3102 }
3103
3104 nkf_char
3105 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3106 {
3107     nkf_char ret, c3, c0;
3108     int hold_index;
3109
3110
3111     /** it must NOT be in the kanji shifte sequence      */
3112     /** it must NOT be written in JIS7                   */
3113     /** and it must be after 2 byte 8bit code            */
3114
3115     hold_count = 0;
3116     push_hold_buf(c2);
3117     push_hold_buf(c1);
3118
3119     while ((c1 = (*i_getc)(f)) != EOF) {
3120         if (c1 == ESC){
3121             (*i_ungetc)(c1,f);
3122             break;
3123         }
3124         code_status(c1);
3125         if (push_hold_buf(c1) == EOF || estab_f){
3126             break;
3127         }
3128     }
3129
3130     if (!estab_f){
3131         struct input_code *p = input_code_list;
3132         struct input_code *result = p;
3133         if (c1 == EOF){
3134             code_status(c1);
3135         }
3136         while (p->name){
3137             if (p->status_func && p->score < result->score){
3138                 result = p;
3139             }
3140             ++p;
3141         }
3142         set_iconv(TRUE, result->iconv_func);
3143     }
3144
3145
3146     /** now,
3147      ** 1) EOF is detected, or
3148      ** 2) Code is established, or
3149      ** 3) Buffer is FULL (but last word is pushed)
3150      **
3151      ** in 1) and 3) cases, we continue to use
3152      ** Kanji codes by oconv and leave estab_f unchanged.
3153      **/
3154
3155     ret = c1;
3156     hold_index = 0;
3157     while (hold_index < hold_count){
3158         c2 = hold_buf[hold_index++];
3159         if (c2 <= DEL
3160 #ifdef NUMCHAR_OPTION
3161             || is_unicode_capsule(c2)
3162 #endif
3163             ){
3164             (*iconv)(0, c2, 0);
3165             continue;
3166         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3167             (*iconv)(JIS_X_0201, c2, 0);
3168             continue;
3169         }
3170         if (hold_index < hold_count){
3171             c1 = hold_buf[hold_index++];
3172         }else{
3173             c1 = (*i_getc)(f);
3174             if (c1 == EOF){
3175                 c3 = EOF;
3176                 break;
3177             }
3178             code_status(c1);
3179         }
3180         c0 = 0;
3181         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3182         case -2:
3183             /* 4 bytes UTF-8 */
3184             if (hold_index < hold_count){
3185                 c0 = hold_buf[hold_index++];
3186             } else if ((c0 = (*i_getc)(f)) == EOF) {
3187                 ret = EOF;
3188                 break;
3189             } else {
3190                 code_status(c0);
3191                 c0 <<= 8;
3192                 if (hold_index < hold_count){
3193                     c3 = hold_buf[hold_index++];
3194                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3195                     c0 = ret = EOF;
3196                     break;
3197                 } else {
3198                     code_status(c3);
3199                     (*iconv)(c2, c1, c0|c3);
3200                 }
3201             }
3202             break;
3203         case -1:
3204             /* 3 bytes EUC or UTF-8 */
3205             if (hold_index < hold_count){
3206                 c0 = hold_buf[hold_index++];
3207             } else if ((c0 = (*i_getc)(f)) == EOF) {
3208                 ret = EOF;
3209                 break;
3210             } else {
3211                 code_status(c0);
3212             }
3213             (*iconv)(c2, c1, c0);
3214             break;
3215         }
3216         if (c0 == EOF) break;
3217     }
3218     return ret;
3219 }
3220
3221 nkf_char push_hold_buf(nkf_char c2)
3222 {
3223     if (hold_count >= HOLD_SIZE*2)
3224         return (EOF);
3225     hold_buf[hold_count++] = (unsigned char)c2;
3226     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3227 }
3228
3229 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3230 {
3231 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3232     nkf_char val;
3233 #endif
3234     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3235 #ifdef SHIFTJIS_CP932
3236     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3237         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3238         if (val){
3239             c2 = val >> 8;
3240             c1 = val & 0xff;
3241         }
3242     }
3243     if (cp932inv_f
3244         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3245         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3246         if (c){
3247             c2 = c >> 8;
3248             c1 = c & 0xff;
3249         }
3250     }
3251 #endif /* SHIFTJIS_CP932 */
3252 #ifdef X0212_ENABLE
3253     if (!x0213_f && is_ibmext_in_sjis(c2)){
3254         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3255         if (val){
3256             if (val > 0x7FFF){
3257                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3258                 c1 = val & 0xff;
3259             }else{
3260                 c2 = val >> 8;
3261                 c1 = val & 0xff;
3262             }
3263             if (p2) *p2 = c2;
3264             if (p1) *p1 = c1;
3265             return 0;
3266         }
3267     }
3268 #endif
3269     if(c2 >= 0x80){
3270         if(x0213_f && c2 >= 0xF0){
3271             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3272                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3273             }else{ /* 78<=k<=94 */
3274                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3275                 if (0x9E < c1) c2++;
3276             }
3277         }else{
3278 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
3279 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
3280             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3281             if (0x9E < c1) c2++;
3282         }
3283         if (c1 < 0x9F)
3284             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3285         else {
3286             c1 = c1 - 0x7E;
3287         }
3288     }
3289
3290 #ifdef X0212_ENABLE
3291     c2 = x0212_unshift(c2);
3292 #endif
3293     if (p2) *p2 = c2;
3294     if (p1) *p1 = c1;
3295     return 0;
3296 }
3297
3298 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3299 {
3300     if (c2 == JIS_X_0201) {
3301         c1 &= 0x7f;
3302     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3303         /* NOP */
3304     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3305         /* CP932 UDC */
3306         if(c1 == 0x7F) return 0;
3307         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3308         c2 = 0;
3309     } else {
3310         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3311         if (ret) return ret;
3312     }
3313     (*oconv)(c2, c1);
3314     return 0;
3315 }
3316
3317 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3318 {
3319     if (c2 == JIS_X_0201) {
3320         c1 &= 0x7f;
3321 #ifdef X0212_ENABLE
3322     }else if (c2 == 0x8f){
3323         if (c0 == 0){
3324             return -1;
3325         }
3326         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3327             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3328             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3329             c2 = 0;
3330         } else {
3331             c2 = (c2 << 8) | (c1 & 0x7f);
3332             c1 = c0 & 0x7f;
3333 #ifdef SHIFTJIS_CP932
3334             if (cp51932_f){
3335                 nkf_char s2, s1;
3336                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3337                     s2e_conv(s2, s1, &c2, &c1);
3338                     if (c2 < 0x100){
3339                         c1 &= 0x7f;
3340                         c2 &= 0x7f;
3341                     }
3342                 }
3343             }
3344 #endif /* SHIFTJIS_CP932 */
3345         }
3346 #endif /* X0212_ENABLE */
3347     } else if (c2 == SSO){
3348         c2 = JIS_X_0201;
3349         c1 &= 0x7f;
3350     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3351         /* NOP */
3352     } else {
3353         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3354             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3355             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3356             c2 = 0;
3357         } else {
3358             c1 &= 0x7f;
3359             c2 &= 0x7f;
3360 #ifdef SHIFTJIS_CP932
3361             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3362                 nkf_char s2, s1;
3363                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3364                     s2e_conv(s2, s1, &c2, &c1);
3365                     if (c2 < 0x100){
3366                         c1 &= 0x7f;
3367                         c2 &= 0x7f;
3368                     }
3369                 }
3370             }
3371 #endif /* SHIFTJIS_CP932 */
3372         }
3373     }
3374     (*oconv)(c2, c1);
3375     return 0;
3376 }
3377
3378 #ifdef UTF8_INPUT_ENABLE
3379 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3380 {
3381     nkf_char ret = 0;
3382
3383     if (!c1){
3384         *p2 = 0;
3385         *p1 = c2;
3386     }else if (0xc0 <= c2 && c2 <= 0xef) {
3387         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3388 #ifdef NUMCHAR_OPTION
3389         if (ret > 0){
3390             if (p2) *p2 = 0;
3391             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3392             ret = 0;
3393         }
3394 #endif
3395     }
3396     return ret;
3397 }
3398
3399 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3400 {
3401     nkf_char ret = 0;
3402     static const char w_iconv_utf8_1st_byte[] =
3403     { /* 0xC0 - 0xFF */
3404         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3405         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3406         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3407         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3408
3409     if (c2 < 0 || 0xff < c2) {
3410     }else if (c2 == 0) { /* 0 : 1 byte*/
3411         c0 = 0;
3412     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3413         return 0;
3414     } else{
3415         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3416         case 21:
3417             if (c1 < 0x80 || 0xBF < c1) return 0;
3418             break;
3419         case 30:
3420             if (c0 == 0) return -1;
3421             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3422                 return 0;
3423             break;
3424         case 31:
3425         case 33:
3426             if (c0 == 0) return -1;
3427             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3428                 return 0;
3429             break;
3430         case 32:
3431             if (c0 == 0) return -1;
3432             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3433                 return 0;
3434             break;
3435         case 40:
3436             if (c0 == 0) return -2;
3437             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3438                 return 0;
3439             break;
3440         case 41:
3441             if (c0 == 0) return -2;
3442             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3443                 return 0;
3444             break;
3445         case 42:
3446             if (c0 == 0) return -2;
3447             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3448                 return 0;
3449             break;
3450         default:
3451             return 0;
3452             break;
3453         }
3454     }
3455     if (c2 == 0 || c2 == EOF){
3456     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3457         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3458         c2 = 0;
3459     } else {
3460         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3461     }
3462     if (ret == 0){
3463         (*oconv)(c2, c1);
3464     }
3465     return ret;
3466 }
3467 #endif
3468
3469 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3470 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3471 {
3472     val &= VALUE_MASK;
3473     if (val < 0x80){
3474         *p2 = val;
3475         *p1 = 0;
3476         *p0 = 0;
3477     }else if (val < 0x800){
3478         *p2 = 0xc0 | (val >> 6);
3479         *p1 = 0x80 | (val & 0x3f);
3480         *p0 = 0;
3481     } else if (val <= NKF_INT32_C(0xFFFF)) {
3482         *p2 = 0xe0 | (val >> 12);
3483         *p1 = 0x80 | ((val >> 6) & 0x3f);
3484         *p0 = 0x80 | (val        & 0x3f);
3485     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3486         *p2 = 0xe0 |  (val >> 16);
3487         *p1 = 0x80 | ((val >> 12) & 0x3f);
3488         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3489     } else {
3490         *p2 = 0;
3491         *p1 = 0;
3492         *p0 = 0;
3493     }
3494 }
3495 #endif
3496
3497 #ifdef UTF8_INPUT_ENABLE
3498 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3499 {
3500     nkf_char val;
3501     if (c2 >= 0xf8) {
3502         val = -1;
3503     } else if (c2 >= 0xf0){
3504         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3505         val = (c2 & 0x0f) << 18;
3506         val |= (c1 & 0x3f) << 12;
3507         val |= (c0 & 0x3f00) >> 2;
3508         val |= (c0 & 0x3f);
3509     }else if (c2 >= 0xe0){
3510         val = (c2 & 0x0f) << 12;
3511         val |= (c1 & 0x3f) << 6;
3512         val |= (c0 & 0x3f);
3513     }else if (c2 >= 0xc0){
3514         val = (c2 & 0x1f) << 6;
3515         val |= (c1 & 0x3f);
3516     }else{
3517         val = c2;
3518     }
3519     return val;
3520 }
3521
3522 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3523 {
3524     nkf_char c2, c1, c0;
3525     nkf_char ret = 0;
3526     val &= VALUE_MASK;
3527     if (val < 0x80){
3528         *p2 = 0;
3529         *p1 = val;
3530     }else{
3531         w16w_conv(val, &c2, &c1, &c0);
3532         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3533 #ifdef NUMCHAR_OPTION
3534         if (ret > 0){
3535             *p2 = 0;
3536             *p1 = CLASS_UNICODE | val;
3537             ret = 0;
3538         }
3539 #endif
3540     }
3541     return ret;
3542 }
3543 #endif
3544
3545 #ifdef UTF8_INPUT_ENABLE
3546 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3547 {
3548     nkf_char ret = 0;
3549     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3550         (*oconv)(c2, c1);
3551         return 0;
3552     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3553         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3554             return -2;
3555         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3556         c2 = 0;
3557     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3558         /*
3559            return 2;
3560         */
3561         return 1;
3562     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3563     if (ret) return ret;
3564     (*oconv)(c2, c1);
3565     return 0;
3566 }
3567
3568 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3569 {
3570     int ret = 0;
3571
3572     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3573     } else if (is_unicode_bmp(c1)) {
3574         ret = w16e_conv(c1, &c2, &c1);
3575     } else {
3576         c2 = 0;
3577         c1 =  CLASS_UNICODE | c1;
3578     }
3579     if (ret) return ret;
3580     (*oconv)(c2, c1);
3581     return 0;
3582 }
3583
3584 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3585 {
3586     const unsigned short *const *pp;
3587     const unsigned short *const *const *ppp;
3588     static const char no_best_fit_chars_table_C2[] =
3589     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3590         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3591         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3592         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3593     static const char no_best_fit_chars_table_C2_ms[] =
3594     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3595         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3596         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3597         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3598     static const char no_best_fit_chars_table_932_C2[] =
3599     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3600         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3601         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3602         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3603     static const char no_best_fit_chars_table_932_C3[] =
3604     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3605         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3606         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3607         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3608     nkf_char ret = 0;
3609
3610     if(c2 < 0x80){
3611         *p2 = 0;
3612         *p1 = c2;
3613     }else if(c2 < 0xe0){
3614         if(no_best_fit_chars_f){
3615             if(ms_ucs_map_f == UCS_MAP_CP932){
3616                 switch(c2){
3617                 case 0xC2:
3618                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3619                     break;
3620                 case 0xC3:
3621                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3622                     break;
3623                 }
3624             }else if(!cp932inv_f){
3625                 switch(c2){
3626                 case 0xC2:
3627                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3628                     break;
3629                 case 0xC3:
3630                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3631                     break;
3632                 }
3633             }else if(ms_ucs_map_f == UCS_MAP_MS){
3634                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3635             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3636                 switch(c2){
3637                 case 0xC2:
3638                     switch(c1){
3639                     case 0xA2:
3640                     case 0xA3:
3641                     case 0xA5:
3642                     case 0xA6:
3643                     case 0xAC:
3644                     case 0xAF:
3645                     case 0xB8:
3646                         return 1;
3647                     }
3648                     break;
3649                 }
3650             }
3651         }
3652         pp =
3653             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3654             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3655             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3656             utf8_to_euc_2bytes;
3657         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3658     }else if(c0 < 0xF0){
3659         if(no_best_fit_chars_f){
3660             if(ms_ucs_map_f == UCS_MAP_CP932){
3661                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3662             }else if(ms_ucs_map_f == UCS_MAP_MS){
3663                 switch(c2){
3664                 case 0xE2:
3665                     switch(c1){
3666                     case 0x80:
3667                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3668                         break;
3669                     case 0x88:
3670                         if(c0 == 0x92) return 1;
3671                         break;
3672                     }
3673                     break;
3674                 case 0xE3:
3675                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3676                     break;
3677                 }
3678             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3679                 switch(c2){
3680                 case 0xE3:
3681                     switch(c1){
3682                     case 0x82:
3683                             if(c0 == 0x94) return 1;
3684                         break;
3685                     case 0x83:
3686                             if(c0 == 0xBB) return 1;
3687                         break;
3688                     }
3689                     break;
3690                 }
3691             }else{
3692                 switch(c2){
3693                 case 0xE2:
3694                     switch(c1){
3695                     case 0x80:
3696                         if(c0 == 0x95) return 1;
3697                         break;
3698                     case 0x88:
3699                         if(c0 == 0xA5) return 1;
3700                         break;
3701                     }
3702                     break;
3703                 case 0xEF:
3704                     switch(c1){
3705                     case 0xBC:
3706                         if(c0 == 0x8D) return 1;
3707                         break;
3708                     case 0xBD:
3709                         if(c0 == 0x9E && !cp932inv_f) return 1;
3710                         break;
3711                     case 0xBF:
3712                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3713                         break;
3714                     }
3715                     break;
3716                 }
3717             }
3718         }
3719         ppp =
3720             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3721             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3722             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3723             utf8_to_euc_3bytes;
3724         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3725     }else return -1;
3726 #ifdef SHIFTJIS_CP932
3727     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3728         nkf_char s2, s1;
3729         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3730             s2e_conv(s2, s1, p2, p1);
3731         }else{
3732             ret = 1;
3733         }
3734     }
3735 #endif
3736     return ret;
3737 }
3738
3739 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3740 {
3741     nkf_char c2;
3742     const unsigned short *p;
3743     unsigned short val;
3744
3745     if (pp == 0) return 1;
3746
3747     c1 -= 0x80;
3748     if (c1 < 0 || psize <= c1) return 1;
3749     p = pp[c1];
3750     if (p == 0)  return 1;
3751
3752     c0 -= 0x80;
3753     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3754     val = p[c0];
3755     if (val == 0) return 1;
3756     if (no_cp932ext_f && (
3757         (val>>8) == 0x2D || /* NEC special characters */
3758         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3759         )) return 1;
3760
3761     c2 = val >> 8;
3762    if (val > 0x7FFF){
3763         c2 &= 0x7f;
3764         c2 |= PREFIX_EUCG3;
3765     }
3766     if (c2 == SO) c2 = JIS_X_0201;
3767     c1 = val & 0x7f;
3768     if (p2) *p2 = c2;
3769     if (p1) *p1 = c1;
3770     return 0;
3771 }
3772
3773 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3774 {
3775     int shift = 20;
3776     c &= VALUE_MASK;
3777     while(shift >= 0){
3778         if(c >= 1<<shift){
3779             while(shift >= 0){
3780                 (*f)(0, bin2hex(c>>shift));
3781                 shift -= 4;
3782             }
3783         }else{
3784             shift -= 4;
3785         }
3786     }
3787     return;
3788 }
3789
3790 void encode_fallback_html(nkf_char c)
3791 {
3792     (*oconv)(0, '&');
3793     (*oconv)(0, '#');
3794     c &= VALUE_MASK;
3795     if(c >= NKF_INT32_C(1000000))
3796         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3797     if(c >= NKF_INT32_C(100000))
3798         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3799     if(c >= 10000)
3800         (*oconv)(0, 0x30+(c/10000  )%10);
3801     if(c >= 1000)
3802         (*oconv)(0, 0x30+(c/1000   )%10);
3803     if(c >= 100)
3804         (*oconv)(0, 0x30+(c/100    )%10);
3805     if(c >= 10)
3806         (*oconv)(0, 0x30+(c/10     )%10);
3807     if(c >= 0)
3808         (*oconv)(0, 0x30+ c         %10);
3809     (*oconv)(0, ';');
3810     return;
3811 }
3812
3813 void encode_fallback_xml(nkf_char c)
3814 {
3815     (*oconv)(0, '&');
3816     (*oconv)(0, '#');
3817     (*oconv)(0, 'x');
3818     nkf_each_char_to_hex(oconv, c);
3819     (*oconv)(0, ';');
3820     return;
3821 }
3822
3823 void encode_fallback_java(nkf_char c)
3824 {
3825     (*oconv)(0, '\\');
3826     c &= VALUE_MASK;
3827     if(!is_unicode_bmp(c)){
3828         (*oconv)(0, 'U');
3829         (*oconv)(0, '0');
3830         (*oconv)(0, '0');
3831         (*oconv)(0, bin2hex(c>>20));
3832         (*oconv)(0, bin2hex(c>>16));
3833     }else{
3834         (*oconv)(0, 'u');
3835     }
3836     (*oconv)(0, bin2hex(c>>12));
3837     (*oconv)(0, bin2hex(c>> 8));
3838     (*oconv)(0, bin2hex(c>> 4));
3839     (*oconv)(0, bin2hex(c    ));
3840     return;
3841 }
3842
3843 void encode_fallback_perl(nkf_char c)
3844 {
3845     (*oconv)(0, '\\');
3846     (*oconv)(0, 'x');
3847     (*oconv)(0, '{');
3848     nkf_each_char_to_hex(oconv, c);
3849     (*oconv)(0, '}');
3850     return;
3851 }
3852
3853 void encode_fallback_subchar(nkf_char c)
3854 {
3855     c = unicode_subchar;
3856     (*oconv)((c>>8)&0xFF, c&0xFF);
3857     return;
3858 }
3859 #endif
3860
3861 #ifdef UTF8_OUTPUT_ENABLE
3862 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3863 {
3864     const unsigned short *p;
3865
3866     if (c2 == JIS_X_0201) {
3867         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3868             switch (c1) {
3869             case 0x20:
3870                 return 0xA0;
3871             case 0x7D:
3872                 return 0xA9;
3873             }
3874         }
3875         p = euc_to_utf8_1byte;
3876 #ifdef X0212_ENABLE
3877     } else if (is_eucg3(c2)){
3878         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3879             return 0xA6;
3880         }
3881         c2 = (c2&0x7f) - 0x21;
3882         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3883             p = x0212_to_utf8_2bytes[c2];
3884         else
3885             return 0;
3886 #endif
3887     } else {
3888         c2 &= 0x7f;
3889         c2 = (c2&0x7f) - 0x21;
3890         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3891             p =
3892                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3893                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3894                 euc_to_utf8_2bytes_ms[c2];
3895         else
3896             return 0;
3897     }
3898     if (!p) return 0;
3899     c1 = (c1 & 0x7f) - 0x21;
3900     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3901         return p[c1];
3902     return 0;
3903 }
3904
3905 void w_oconv(nkf_char c2, nkf_char c1)
3906 {
3907     nkf_char c0;
3908     nkf_char val;
3909
3910     if (output_bom_f) {
3911         output_bom_f = FALSE;
3912         (*o_putc)('\357');
3913         (*o_putc)('\273');
3914         (*o_putc)('\277');
3915     }
3916
3917     if (c2 == EOF) {
3918         (*o_putc)(EOF);
3919         return;
3920     }
3921
3922 #ifdef NUMCHAR_OPTION
3923     if (c2 == 0 && is_unicode_capsule(c1)){
3924         val = c1 & VALUE_MASK;
3925         if (val < 0x80){
3926             (*o_putc)(val);
3927         }else if (val < 0x800){
3928             (*o_putc)(0xC0 | (val >> 6));
3929             (*o_putc)(0x80 | (val & 0x3f));
3930         } else if (val <= NKF_INT32_C(0xFFFF)) {
3931             (*o_putc)(0xE0 | (val >> 12));
3932             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3933             (*o_putc)(0x80 | (val        & 0x3f));
3934         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3935             (*o_putc)(0xF0 | ( val>>18));
3936             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3937             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3938             (*o_putc)(0x80 | ( val      & 0x3f));
3939         }
3940         return;
3941     }
3942 #endif
3943
3944     if (c2 == 0) {
3945         output_mode = ASCII;
3946         (*o_putc)(c1);
3947     } else if (c2 == ISO_8859_1) {
3948         output_mode = UTF_8;
3949         (*o_putc)(c1 | 0x080);
3950     } else {
3951         output_mode = UTF_8;
3952         val = e2w_conv(c2, c1);
3953         if (val){
3954             w16w_conv(val, &c2, &c1, &c0);
3955             (*o_putc)(c2);
3956             if (c1){
3957                 (*o_putc)(c1);
3958                 if (c0) (*o_putc)(c0);
3959             }
3960         }
3961     }
3962 }
3963
3964 void w_oconv16(nkf_char c2, nkf_char c1)
3965 {
3966     if (output_bom_f) {
3967         output_bom_f = FALSE;
3968         if (output_endian == ENDIAN_LITTLE){
3969             (*o_putc)((unsigned char)'\377');
3970             (*o_putc)('\376');
3971         }else{
3972             (*o_putc)('\376');
3973             (*o_putc)((unsigned char)'\377');
3974         }
3975     }
3976
3977     if (c2 == EOF) {
3978         (*o_putc)(EOF);
3979         return;
3980     }
3981
3982     if (c2 == ISO_8859_1) {
3983         c2 = 0;
3984         c1 |= 0x80;
3985 #ifdef NUMCHAR_OPTION
3986     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3987         if (is_unicode_bmp(c1)) {
3988             c2 = (c1 >> 8) & 0xff;
3989             c1 &= 0xff;
3990         } else {
3991             c1 &= VALUE_MASK;
3992             if (c1 <= UNICODE_MAX) {
3993                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3994                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3995                 if (output_endian == ENDIAN_LITTLE){
3996                     (*o_putc)(c2 & 0xff);
3997                     (*o_putc)((c2 >> 8) & 0xff);
3998                     (*o_putc)(c1 & 0xff);
3999                     (*o_putc)((c1 >> 8) & 0xff);
4000                 }else{
4001                     (*o_putc)((c2 >> 8) & 0xff);
4002                     (*o_putc)(c2 & 0xff);
4003                     (*o_putc)((c1 >> 8) & 0xff);
4004                     (*o_putc)(c1 & 0xff);
4005                 }
4006             }
4007             return;
4008         }
4009 #endif
4010     } else if (c2) {
4011         nkf_char val = e2w_conv(c2, c1);
4012         c2 = (val >> 8) & 0xff;
4013         c1 = val & 0xff;
4014         if (!val) return;
4015     }
4016     if (output_endian == ENDIAN_LITTLE){
4017         (*o_putc)(c1);
4018         (*o_putc)(c2);
4019     }else{
4020         (*o_putc)(c2);
4021         (*o_putc)(c1);
4022     }
4023 }
4024
4025 void w_oconv32(nkf_char c2, nkf_char c1)
4026 {
4027     if (output_bom_f) {
4028         output_bom_f = FALSE;
4029         if (output_endian == ENDIAN_LITTLE){
4030             (*o_putc)((unsigned char)'\377');
4031             (*o_putc)('\376');
4032             (*o_putc)('\000');
4033             (*o_putc)('\000');
4034         }else{
4035             (*o_putc)('\000');
4036             (*o_putc)('\000');
4037             (*o_putc)('\376');
4038             (*o_putc)((unsigned char)'\377');
4039         }
4040     }
4041
4042     if (c2 == EOF) {
4043         (*o_putc)(EOF);
4044         return;
4045     }
4046
4047     if (c2 == ISO_8859_1) {
4048         c1 |= 0x80;
4049 #ifdef NUMCHAR_OPTION
4050     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4051         c1 &= VALUE_MASK;
4052 #endif
4053     } else if (c2) {
4054         c1 = e2w_conv(c2, c1);
4055         if (!c1) return;
4056     }
4057     if (output_endian == ENDIAN_LITTLE){
4058         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4059         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4060         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4061         (*o_putc)('\000');
4062     }else{
4063         (*o_putc)('\000');
4064         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4065         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4066         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4067     }
4068 }
4069 #endif
4070
4071 void e_oconv(nkf_char c2, nkf_char c1)
4072 {
4073 #ifdef NUMCHAR_OPTION
4074     if (c2 == 0 && is_unicode_capsule(c1)){
4075         w16e_conv(c1, &c2, &c1);
4076         if (c2 == 0 && is_unicode_capsule(c1)){
4077             c2 = c1 & VALUE_MASK;
4078             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
4079                 /* eucJP-ms UDC */
4080                 c1 &= 0xFFF;
4081                 c2 = c1 / 94;
4082                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
4083                 c1 = 0x21 + c1 % 94;
4084                 if (is_eucg3(c2)){
4085                     (*o_putc)(0x8f);
4086                     (*o_putc)((c2 & 0x7f) | 0x080);
4087                     (*o_putc)(c1 | 0x080);
4088                 }else{
4089                     (*o_putc)((c2 & 0x7f) | 0x080);
4090                     (*o_putc)(c1 | 0x080);
4091                 }
4092                 return;
4093             } else {
4094                 if (encode_fallback) (*encode_fallback)(c1);
4095                 return;
4096             }
4097         }
4098     }
4099 #endif
4100     if (c2 == EOF) {
4101         (*o_putc)(EOF);
4102         return;
4103     } else if (c2 == 0) {
4104         output_mode = ASCII;
4105         (*o_putc)(c1);
4106     } else if (c2 == JIS_X_0201) {
4107         output_mode = EUC_JP;
4108         (*o_putc)(SSO); (*o_putc)(c1|0x80);
4109     } else if (c2 == ISO_8859_1) {
4110         output_mode = ISO_8859_1;
4111         (*o_putc)(c1 | 0x080);
4112 #ifdef X0212_ENABLE
4113     } else if (is_eucg3(c2)){
4114         output_mode = EUC_JP;
4115 #ifdef SHIFTJIS_CP932
4116         if (!cp932inv_f){
4117             nkf_char s2, s1;
4118             if (e2s_conv(c2, c1, &s2, &s1) == 0){
4119                 s2e_conv(s2, s1, &c2, &c1);
4120             }
4121         }
4122 #endif
4123         if (c2 == 0) {
4124             output_mode = ASCII;
4125             (*o_putc)(c1);
4126         }else if (is_eucg3(c2)){
4127             if (x0212_f){
4128                 (*o_putc)(0x8f);
4129                 (*o_putc)((c2 & 0x7f) | 0x080);
4130                 (*o_putc)(c1 | 0x080);
4131             }
4132         }else{
4133             (*o_putc)((c2 & 0x7f) | 0x080);
4134             (*o_putc)(c1 | 0x080);
4135         }
4136 #endif
4137     } else {
4138         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
4139             set_iconv(FALSE, 0);
4140             return; /* too late to rescue this char */
4141         }
4142         output_mode = EUC_JP;
4143         (*o_putc)(c2 | 0x080);
4144         (*o_putc)(c1 | 0x080);
4145     }
4146 }
4147
4148 #ifdef X0212_ENABLE
4149 nkf_char x0212_shift(nkf_char c)
4150 {
4151     nkf_char ret = c;
4152     c &= 0x7f;
4153     if (is_eucg3(ret)){
4154         if (0x75 <= c && c <= 0x7f){
4155             ret = c + (0x109 - 0x75);
4156         }
4157     }else{
4158         if (0x75 <= c && c <= 0x7f){
4159             ret = c + (0x113 - 0x75);
4160         }
4161     }
4162     return ret;
4163 }
4164
4165
4166 nkf_char x0212_unshift(nkf_char c)
4167 {
4168     nkf_char ret = c;
4169     if (0x7f <= c && c <= 0x88){
4170         ret = c + (0x75 - 0x7f);
4171     }else if (0x89 <= c && c <= 0x92){
4172         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4173     }
4174     return ret;
4175 }
4176 #endif /* X0212_ENABLE */
4177
4178 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4179 {
4180     nkf_char ndx;
4181     if (is_eucg3(c2)){
4182         ndx = c2 & 0x7f;
4183         if (x0213_f){
4184             if((0x21 <= ndx && ndx <= 0x2F)){
4185                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4186                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4187                 return 0;
4188             }else if(0x6E <= ndx && ndx <= 0x7E){
4189                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4190                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4191                 return 0;
4192             }
4193             return 1;
4194         }
4195 #ifdef X0212_ENABLE
4196         else if(nkf_isgraph(ndx)){
4197             nkf_char val = 0;
4198             const unsigned short *ptr;
4199             ptr = x0212_shiftjis[ndx - 0x21];
4200             if (ptr){
4201                 val = ptr[(c1 & 0x7f) - 0x21];
4202             }
4203             if (val){
4204                 c2 = val >> 8;
4205                 c1 = val & 0xff;
4206                 if (p2) *p2 = c2;
4207                 if (p1) *p1 = c1;
4208                 return 0;
4209             }
4210             c2 = x0212_shift(c2);
4211         }
4212 #endif /* X0212_ENABLE */
4213     }
4214     if(0x7F < c2) return 1;
4215     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4216     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4217     return 0;
4218 }
4219
4220 void s_oconv(nkf_char c2, nkf_char c1)
4221 {
4222 #ifdef NUMCHAR_OPTION
4223     if (c2 == 0 && is_unicode_capsule(c1)){
4224         w16e_conv(c1, &c2, &c1);
4225         if (c2 == 0 && is_unicode_capsule(c1)){
4226             c2 = c1 & VALUE_MASK;
4227             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
4228                 /* CP932 UDC */
4229                 c1 &= 0xFFF;
4230                 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
4231                 c1 = c1 % 188;
4232                 c1 += 0x40 + (c1 > 0x3e);
4233                 (*o_putc)(c2);
4234                 (*o_putc)(c1);
4235                 return;
4236             } else {
4237                 if(encode_fallback)(*encode_fallback)(c1);
4238                 return;
4239             }
4240         }
4241     }
4242 #endif
4243     if (c2 == EOF) {
4244         (*o_putc)(EOF);
4245         return;
4246     } else if (c2 == 0) {
4247         output_mode = ASCII;
4248         (*o_putc)(c1);
4249     } else if (c2 == JIS_X_0201) {
4250         output_mode = SHIFT_JIS;
4251         (*o_putc)(c1|0x80);
4252     } else if (c2 == ISO_8859_1) {
4253         output_mode = ISO_8859_1;
4254         (*o_putc)(c1 | 0x080);
4255 #ifdef X0212_ENABLE
4256     } else if (is_eucg3(c2)){
4257         output_mode = SHIFT_JIS;
4258         if (e2s_conv(c2, c1, &c2, &c1) == 0){
4259             (*o_putc)(c2);
4260             (*o_putc)(c1);
4261         }
4262 #endif
4263     } else {
4264         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4265             set_iconv(FALSE, 0);
4266             return; /* too late to rescue this char */