OSDN Git Service

* canonical name of ASCII is US-ASCII.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.163 2008/01/11 00:45:06 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2008-01-11"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #if defined(DEFAULT_CODE_JIS)
44 #elif defined(DEFAULT_CODE_SJIS)
45 #elif defined(DEFAULT_CODE_EUC)
46 #elif defined(DEFAULT_CODE_UTF8)
47 #else
48 #define DEFAULT_CODE_JIS 1
49 #endif
50
51 #ifndef MIME_DECODE_DEFAULT
52 #define MIME_DECODE_DEFAULT STRICT_MIME
53 #endif
54 #ifndef X0201_DEFAULT
55 #define X0201_DEFAULT TRUE
56 #endif
57
58 #if DEFAULT_NEWLINE == 0x0D0A
59 #define PUT_NEWLINE(func) do {\
60     func(0x0D);\
61     func(0x0A);\
62 } while (0)
63 #define OCONV_NEWLINE(func) do {\
64     func(0, 0x0D);\
65     func(0, 0x0A);\
66 } while (0)
67 #elif DEFAULT_NEWLINE == 0x0D
68 #define PUT_NEWLINE(func) func(0x0D)
69 #define OCONV_NEWLINE(func) func(0, 0x0D)
70 #else
71 #define DEFAULT_NEWLINE 0x0A
72 #define PUT_NEWLINE(func) func(0x0A)
73 #define OCONV_NEWLINE(func) func(0, 0x0A)
74 #endif
75 #ifdef HELP_OUTPUT_STDERR
76 #define HELP_OUTPUT stderr
77 #else
78 #define HELP_OUTPUT stdout
79 #endif
80
81 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
82 #define MSDOS
83 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
84 #define __WIN32__
85 #endif
86 #endif
87
88 #ifdef PERL_XS
89 #undef OVERWRITE
90 #endif
91
92 #ifndef PERL_XS
93 #include <stdio.h>
94 #endif
95
96 #include <stdlib.h>
97 #include <string.h>
98
99 #if defined(MSDOS) || defined(__OS2__)
100 #include <fcntl.h>
101 #include <io.h>
102 #if defined(_MSC_VER) || defined(__WATCOMC__)
103 #define mktemp _mktemp
104 #endif
105 #endif
106
107 #ifdef MSDOS
108 #ifdef LSI_C
109 #define setbinmode(fp) fsetbin(fp)
110 #elif defined(__DJGPP__)
111 #include <libc/dosio.h>
112 #define setbinmode(fp) djgpp_setbinmode(fp)
113 #else /* Microsoft C, Turbo C */
114 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
115 #endif
116 #else /* UNIX */
117 #define setbinmode(fp)
118 #endif
119
120 #if defined(__DJGPP__)
121 void  djgpp_setbinmode(FILE *fp)
122 {
123     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
124     int fd, m;
125     fd = fileno(fp);
126     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
127     __file_handle_set(fd, m);
128 }
129 #endif
130
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
133 #else /* BSD */
134 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
135 #endif
136
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
139 #define         EASYWIN
140 #ifndef __WIN16__
141 #define __WIN16__
142 #endif
143 #include <windows.h>
144 #endif
145
146 #ifdef OVERWRITE
147 /* added by satoru@isoternet.org */
148 #if defined(__EMX__)
149 #include <sys/types.h>
150 #endif
151 #include <sys/stat.h>
152 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
153 #include <unistd.h>
154 #if defined(__WATCOMC__)
155 #include <sys/utime.h>
156 #else
157 #include <utime.h>
158 #endif
159 #else /* defined(MSDOS) */
160 #ifdef __WIN32__
161 #ifdef __BORLANDC__ /* BCC32 */
162 #include <utime.h>
163 #else /* !defined(__BORLANDC__) */
164 #include <sys/utime.h>
165 #endif /* (__BORLANDC__) */
166 #else /* !defined(__WIN32__) */
167 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
168 #include <sys/utime.h>
169 #elif defined(__TURBOC__) /* BCC */
170 #include <utime.h>
171 #elif defined(LSI_C) /* LSI C */
172 #endif /* (__WIN32__) */
173 #endif
174 #endif
175 #endif
176
177 #define         FALSE   0
178 #define         TRUE    1
179
180 /* state of output_mode and input_mode
181
182    c2           0 means ASCII
183                 JIS_X_0201
184                 ISO_8859_1
185                 JIS_X_0208
186                 EOF      all termination
187    c1           32bit data
188
189  */
190
191 /* MIME ENCODE */
192
193 #define         FIXED_MIME      7
194 #define         STRICT_MIME     8
195
196 /* byte order */
197 enum byte_order {
198     ENDIAN_BIG    = 1,
199     ENDIAN_LITTLE = 2,
200     ENDIAN_2143   = 3,
201     ENDIAN_3412   = 4
202 };
203
204 /* ASCII CODE */
205
206 #define         BS      0x08
207 #define         TAB     0x09
208 #define         LF      0x0a
209 #define         CR      0x0d
210 #define         ESC     0x1b
211 #define         SP      0x20
212 #define         AT      0x40
213 #define         SSP     0xa0
214 #define         DEL     0x7f
215 #define         SI      0x0f
216 #define         SO      0x0e
217 #define         SSO     0x8e
218 #define         SS3     0x8f
219 #define         CRLF    0x0D0A
220
221
222 /* encodings */
223
224 enum nkf_encodings {
225     ASCII,
226     ISO_8859_1,
227     ISO_2022_JP,
228     CP50220,
229     CP50221,
230     CP50222,
231     ISO_2022_JP_1,
232     ISO_2022_JP_3,
233     SHIFT_JIS,
234     WINDOWS_31J,
235     CP10001,
236     EUC_JP,
237     CP51932,
238     EUCJP_MS,
239     EUCJP_ASCII,
240     SHIFT_JISX0213,
241     SHIFT_JIS_2004,
242     EUC_JISX0213,
243     EUC_JIS_2004,
244     UTF_8,
245     UTF_8N,
246     UTF_8_BOM,
247     UTF8_MAC,
248     UTF_16,
249     UTF_16BE,
250     UTF_16BE_BOM,
251     UTF_16LE,
252     UTF_16LE_BOM,
253     UTF_32,
254     UTF_32BE,
255     UTF_32BE_BOM,
256     UTF_32LE,
257     UTF_32LE_BOM,
258     NKF_ENCODING_TABLE_SIZE,
259     JIS_X_0201=0x1000,
260     JIS_X_0208=0x1001,
261     JIS_X_0212=0x1002,
262     JIS_X_0213_1=0x1003,
263     JIS_X_0213_2=0x1004,
264     BINARY
265 };
266
267 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
268 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
269 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
270 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
271 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
272 void j_oconv(nkf_char c2, nkf_char c1);
273 void s_oconv(nkf_char c2, nkf_char c1);
274 void e_oconv(nkf_char c2, nkf_char c1);
275 void w_oconv(nkf_char c2, nkf_char c1);
276 void w_oconv16(nkf_char c2, nkf_char c1);
277 void w_oconv32(nkf_char c2, nkf_char c1);
278
279 typedef struct {
280     const char *name;
281     nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
282     void (*oconv)(nkf_char c2, nkf_char c1);
283 } nkf_native_encoding;
284
285 nkf_native_encoding NkfEncodingASCII =          { "ASCII", e_iconv, e_oconv };
286 nkf_native_encoding NkfEncodingISO_2022_JP =    { "ISO-2022-JP", e_iconv, j_oconv };
287 nkf_native_encoding NkfEncodingShift_JIS =      { "Shift_JIS", s_iconv, s_oconv };
288 nkf_native_encoding NkfEncodingEUC_JP =         { "EUC-JP", e_iconv, e_oconv };
289 nkf_native_encoding NkfEncodingUTF_8 =          { "UTF-8", w_iconv, w_oconv };
290 nkf_native_encoding NkfEncodingUTF_16 =         { "UTF-16", w_iconv16, w_oconv16 };
291 nkf_native_encoding NkfEncodingUTF_32 =         { "UTF-32", w_iconv32, w_oconv32 };
292
293 typedef struct {
294     const int id;
295     const char *name;
296     const nkf_native_encoding *base_encoding;
297 } nkf_encoding;
298
299 nkf_encoding nkf_encoding_table[] = {
300     {ASCII,             "US-ASCII",             &NkfEncodingASCII},
301     {ISO_8859_1,        "ISO-8859-1",           &NkfEncodingASCII},
302     {ISO_2022_JP,       "ISO-2022-JP",          &NkfEncodingISO_2022_JP},
303     {CP50220,           "CP50220",              &NkfEncodingISO_2022_JP},
304     {CP50221,           "CP50221",              &NkfEncodingISO_2022_JP},
305     {CP50222,           "CP50222",              &NkfEncodingISO_2022_JP},
306     {ISO_2022_JP_1,     "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
307     {ISO_2022_JP_3,     "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
308     {SHIFT_JIS,         "Shift_JIS",            &NkfEncodingShift_JIS},
309     {WINDOWS_31J,       "Windows-31J",          &NkfEncodingShift_JIS},
310     {CP10001,           "CP10001",              &NkfEncodingShift_JIS},
311     {EUC_JP,            "EUC-JP",               &NkfEncodingEUC_JP},
312     {CP51932,           "CP51932",              &NkfEncodingEUC_JP},
313     {EUCJP_MS,          "eucJP-MS",             &NkfEncodingEUC_JP},
314     {EUCJP_ASCII,       "eucJP-ASCII",          &NkfEncodingEUC_JP},
315     {SHIFT_JISX0213,    "Shift_JISX0213",       &NkfEncodingShift_JIS},
316     {SHIFT_JIS_2004,    "Shift_JIS-2004",       &NkfEncodingShift_JIS},
317     {EUC_JISX0213,      "EUC-JISX0213",         &NkfEncodingEUC_JP},
318     {EUC_JIS_2004,      "EUC-JIS-2004",         &NkfEncodingEUC_JP},
319     {UTF_8,             "UTF-8",                &NkfEncodingUTF_8},
320     {UTF_8N,            "UTF-8N",               &NkfEncodingUTF_8},
321     {UTF_8_BOM,         "UTF-8-BOM",            &NkfEncodingUTF_8},
322     {UTF8_MAC,          "UTF8-MAC",             &NkfEncodingUTF_8},
323     {UTF_16,            "UTF-16",               &NkfEncodingUTF_16},
324     {UTF_16BE,          "UTF-16BE",             &NkfEncodingUTF_16},
325     {UTF_16BE_BOM,      "UTF-16BE-BOM",         &NkfEncodingUTF_16},
326     {UTF_16LE,          "UTF-16LE",             &NkfEncodingUTF_16},
327     {UTF_16LE_BOM,      "UTF-16LE-BOM",         &NkfEncodingUTF_16},
328     {UTF_32,            "UTF-32",               &NkfEncodingUTF_32},
329     {UTF_32BE,          "UTF-32BE",             &NkfEncodingUTF_32},
330     {UTF_32BE_BOM,      "UTF-32BE-BOM",         &NkfEncodingUTF_32},
331     {UTF_32LE,          "UTF-32LE",             &NkfEncodingUTF_32},
332     {UTF_32LE_BOM,      "UTF-32LE-BOM",         &NkfEncodingUTF_32},
333     {BINARY,            "BINARY",               &NkfEncodingASCII},
334     {-1,                NULL,                   NULL}
335 };
336
337 struct {
338     const char *name;
339     const int id;
340 } encoding_name_to_id_table[] = {
341     {"US-ASCII",                ASCII},
342     {"ASCII",                   ASCII},
343     {"ISO-2022-JP",             ISO_2022_JP},
344     {"ISO2022JP-CP932",         CP50220},
345     {"CP50220",                 CP50220},
346     {"CP50221",                 CP50221},
347     {"CP50222",                 CP50222},
348     {"ISO-2022-JP-1",           ISO_2022_JP_1},
349     {"ISO-2022-JP-3",           ISO_2022_JP_3},
350     {"SHIFT_JIS",               SHIFT_JIS},
351     {"SJIS",                    SHIFT_JIS},
352     {"WINDOWS-31J",             WINDOWS_31J},
353     {"CSWINDOWS31J",            WINDOWS_31J},
354     {"CP932",                   WINDOWS_31J},
355     {"MS932",                   WINDOWS_31J},
356     {"CP10001",                 CP10001},
357     {"EUCJP",                   EUC_JP},
358     {"EUC-JP",                  EUC_JP},
359     {"CP51932",                 CP51932},
360     {"EUC-JP-MS",               EUCJP_MS},
361     {"EUCJP-MS",                EUCJP_MS},
362     {"EUCJPMS",                 EUCJP_MS},
363     {"EUC-JP-ASCII",            EUCJP_ASCII},
364     {"EUCJP-ASCII",             EUCJP_ASCII},
365     {"SHIFT_JISX0213",          SHIFT_JISX0213},
366     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
367     {"EUC-JISX0213",            EUC_JISX0213},
368     {"EUC-JIS-2004",            EUC_JIS_2004},
369     {"UTF-8",                   UTF_8},
370     {"UTF-8N",                  UTF_8N},
371     {"UTF-8-BOM",               UTF_8_BOM},
372     {"UTF8-MAC",                UTF8_MAC},
373     {"UTF-8-MAC",               UTF8_MAC},
374     {"UTF-16",                  UTF_16},
375     {"UTF-16BE",                UTF_16BE},
376     {"UTF-16BE-BOM",            UTF_16BE_BOM},
377     {"UTF-16LE",                UTF_16LE},
378     {"UTF-16LE-BOM",            UTF_16LE_BOM},
379     {"UTF-32",                  UTF_32},
380     {"UTF-32BE",                UTF_32BE},
381     {"UTF-32BE-BOM",            UTF_32BE_BOM},
382     {"UTF-32LE",                UTF_32LE},
383     {"UTF-32LE-BOM",            UTF_32LE_BOM},
384     {"BINARY",                  BINARY},
385     {NULL,                      -1}
386 };
387 #if defined(DEFAULT_CODE_JIS)
388 #define     DEFAULT_ENCODING ISO_2022_JP
389 #elif defined(DEFAULT_CODE_SJIS)
390 #define     DEFAULT_ENCODING SHIFT_JIS
391 #elif defined(DEFAULT_CODE_EUC)
392 #define     DEFAULT_ENCODING EUC_JP
393 #elif defined(DEFAULT_CODE_UTF8)
394 #define     DEFAULT_ENCODING UTF_8
395 #endif
396
397
398 #define         is_alnum(c)  \
399             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
400
401 /* I don't trust portablity of toupper */
402 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
403 #define nkf_isoctal(c)  ('0'<=c && c<='7')
404 #define nkf_isdigit(c)  ('0'<=c && c<='9')
405 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
406 #define nkf_isblank(c) (c == SP || c == TAB)
407 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
408 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
409 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
410 #define nkf_isprint(c) (SP<=c && c<='~')
411 #define nkf_isgraph(c) ('!'<=c && c<='~')
412 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
413                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
414                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
415 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
416 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
417 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
418     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
419      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
420
421 #define CP932_TABLE_BEGIN 0xFA
422 #define CP932_TABLE_END   0xFC
423 #define CP932INV_TABLE_BEGIN 0xED
424 #define CP932INV_TABLE_END   0xEE
425 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
426
427 #define         HOLD_SIZE       1024
428 #if defined(INT_IS_SHORT)
429 #define         IOBUF_SIZE      2048
430 #else
431 #define         IOBUF_SIZE      16384
432 #endif
433
434 #define         DEFAULT_J       'B'
435 #define         DEFAULT_R       'B'
436
437 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
438 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
439
440 #define         RANGE_NUM_MAX   18
441 #define         GETA1   0x22
442 #define         GETA2   0x2e
443
444
445 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
446 #define sizeof_euc_to_utf8_1byte 94
447 #define sizeof_euc_to_utf8_2bytes 94
448 #define sizeof_utf8_to_euc_C2 64
449 #define sizeof_utf8_to_euc_E5B8 64
450 #define sizeof_utf8_to_euc_2bytes 112
451 #define sizeof_utf8_to_euc_3bytes 16
452 #endif
453
454 /* MIME preprocessor */
455
456 #ifdef EASYWIN /*Easy Win */
457 extern POINT _BufferSize;
458 #endif
459
460 struct input_code{
461     char *name;
462     nkf_char stat;
463     nkf_char score;
464     nkf_char index;
465     nkf_char buf[3];
466     void (*status_func)(struct input_code *, nkf_char);
467     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
468     int _file_stat;
469 };
470
471 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
472 static nkf_encoding *input_encoding = NULL;
473 static nkf_encoding *output_encoding = NULL;
474
475 #if !defined(PERL_XS) && !defined(WIN32DLL)
476 static  nkf_char     noconvert(FILE *f);
477 #endif
478 static  void    module_connection(void);
479 static  nkf_char     kanji_convert(FILE *f);
480 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
481 static  nkf_char     push_hold_buf(nkf_char c2);
482 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
483 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
484 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
485 /* UCS Mapping
486  * 0: Shift_JIS, eucJP-ascii
487  * 1: eucJP-ms
488  * 2: CP932, CP51932
489  * 3: CP10001
490  */
491 #define UCS_MAP_ASCII   0
492 #define UCS_MAP_MS      1
493 #define UCS_MAP_CP932   2
494 #define UCS_MAP_CP10001 3
495 static int ms_ucs_map_f = UCS_MAP_ASCII;
496 #endif
497 #ifdef UTF8_INPUT_ENABLE
498 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
499 static  int     no_cp932ext_f = FALSE;
500 /* ignore ZERO WIDTH NO-BREAK SPACE */
501 static  int     no_best_fit_chars_f = FALSE;
502 static  int     input_endian = ENDIAN_BIG;
503 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
504 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
505 static  void    encode_fallback_html(nkf_char c);
506 static  void    encode_fallback_xml(nkf_char c);
507 static  void    encode_fallback_java(nkf_char c);
508 static  void    encode_fallback_perl(nkf_char c);
509 static  void    encode_fallback_subchar(nkf_char c);
510 static  void    (*encode_fallback)(nkf_char c) = NULL;
511 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
512 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
513 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
514 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
515 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
516 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
517 static  void    w_status(struct input_code *, nkf_char);
518 #endif
519 #ifdef UTF8_OUTPUT_ENABLE
520 static  int     output_bom_f = FALSE;
521 static  int     output_endian = ENDIAN_BIG;
522 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
523 #endif
524 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
525 static  void    fold_conv(nkf_char c2,nkf_char c1);
526 static  void    nl_conv(nkf_char c2,nkf_char c1);
527 static  void    z_conv(nkf_char c2,nkf_char c1);
528 static  void    rot_conv(nkf_char c2,nkf_char c1);
529 static  void    hira_conv(nkf_char c2,nkf_char c1);
530 static  void    base64_conv(nkf_char c2,nkf_char c1);
531 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
532 static  void    no_connection(nkf_char c2,nkf_char c1);
533 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
534
535 static  void    code_score(struct input_code *ptr);
536 static  void    code_status(nkf_char c);
537
538 static  void    std_putc(nkf_char c);
539 static  nkf_char     std_getc(FILE *f);
540 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
541
542 static  nkf_char     broken_getc(FILE *f);
543 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
544
545 static  nkf_char     mime_begin(FILE *f);
546 static  nkf_char     mime_getc(FILE *f);
547 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
548
549 static  void    switch_mime_getc(void);
550 static  void    unswitch_mime_getc(void);
551 static  nkf_char     mime_begin_strict(FILE *f);
552 static  nkf_char     mime_getc_buf(FILE *f);
553 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
554 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
555
556 static  nkf_char     base64decode(nkf_char c);
557 static  void    mime_prechar(nkf_char c2, nkf_char c1);
558 static  void    mime_putc(nkf_char c);
559 static  void    open_mime(nkf_char c);
560 static  void    close_mime(void);
561 static  void    eof_mime(void);
562 static  void    mimeout_addchar(nkf_char c);
563 #ifndef PERL_XS
564 static  void    usage(void);
565 static  void    version(void);
566 static  void    show_configuration(void);
567 #endif
568 static  void    options(unsigned char *c);
569 static  void    reinit(void);
570
571 /* buffers */
572
573 #if !defined(PERL_XS) && !defined(WIN32DLL)
574 static unsigned char   stdibuf[IOBUF_SIZE];
575 static unsigned char   stdobuf[IOBUF_SIZE];
576 #endif
577 static unsigned char   hold_buf[HOLD_SIZE*2];
578 static int             hold_count = 0;
579
580 /* MIME preprocessor fifo */
581
582 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
583 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
584 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
585 static unsigned char           mime_buf[MIME_BUF_SIZE];
586 static unsigned int            mime_top = 0;
587 static unsigned int            mime_last = 0;  /* decoded */
588 static unsigned int            mime_input = 0; /* undecoded */
589 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
590
591 /* flags */
592 static int             unbuf_f = FALSE;
593 static int             estab_f = FALSE;
594 static int             nop_f = FALSE;
595 static int             binmode_f = TRUE;       /* binary mode */
596 static int             rot_f = FALSE;          /* rot14/43 mode */
597 static int             hira_f = FALSE;          /* hira/kata henkan */
598 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
599 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
600 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
601 static int             mimebuf_f = FALSE;      /* MIME buffered input */
602 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
603 static int             iso8859_f = FALSE;      /* ISO8859 through */
604 static int             mimeout_f = FALSE;       /* base64 mode */
605 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
606 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
607
608 #ifdef UNICODE_NORMALIZATION
609 static int nfc_f = FALSE;
610 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
611 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
612 static nkf_char nfc_getc(FILE *f);
613 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
614 #endif
615
616 #ifdef INPUT_OPTION
617 static int cap_f = FALSE;
618 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
619 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
620 static nkf_char cap_getc(FILE *f);
621 static nkf_char cap_ungetc(nkf_char c,FILE *f);
622
623 static int url_f = FALSE;
624 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
625 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
626 static nkf_char url_getc(FILE *f);
627 static nkf_char url_ungetc(nkf_char c,FILE *f);
628 #endif
629
630 #if defined(INT_IS_SHORT)
631 #define NKF_INT32_C(n)   (n##L)
632 #else
633 #define NKF_INT32_C(n)   (n)
634 #endif
635 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
636 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
637 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
638 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
639 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
640 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
641 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
642
643 #ifdef NUMCHAR_OPTION
644 static int numchar_f = FALSE;
645 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
646 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
647 static nkf_char numchar_getc(FILE *f);
648 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
649 #endif
650
651 #ifdef CHECK_OPTION
652 static int noout_f = FALSE;
653 static void no_putc(nkf_char c);
654 static int debug_f = FALSE;
655 static void debug(const char *str);
656 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
657 #endif
658
659 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
660 #if !defined PERL_XS
661 static  void    print_guessed_code(char *filename);
662 #endif
663 static  void    set_input_codename(char *codename);
664
665 #ifdef EXEC_IO
666 static int exec_f = 0;
667 #endif
668
669 #ifdef SHIFTJIS_CP932
670 /* invert IBM extended characters to others */
671 static int cp51932_f = FALSE;
672
673 /* invert NEC-selected IBM extended characters to IBM extended characters */
674 static int cp932inv_f = TRUE;
675
676 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
677 #endif /* SHIFTJIS_CP932 */
678
679 #ifdef X0212_ENABLE
680 static int x0212_f = FALSE;
681 static nkf_char x0212_shift(nkf_char c);
682 static nkf_char x0212_unshift(nkf_char c);
683 #endif
684 static int x0213_f = FALSE;
685
686 static unsigned char prefix_table[256];
687
688 static void set_code_score(struct input_code *ptr, nkf_char score);
689 static void clr_code_score(struct input_code *ptr, nkf_char score);
690 static void status_disable(struct input_code *ptr);
691 static void status_push_ch(struct input_code *ptr, nkf_char c);
692 static void status_clear(struct input_code *ptr);
693 static void status_reset(struct input_code *ptr);
694 static void status_reinit(struct input_code *ptr);
695 static void status_check(struct input_code *ptr, nkf_char c);
696 static void e_status(struct input_code *, nkf_char);
697 static void s_status(struct input_code *, nkf_char);
698
699 struct input_code input_code_list[] = {
700     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
701     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
702 #ifdef UTF8_INPUT_ENABLE
703     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
704     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
705     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
706 #endif
707     {0}
708 };
709
710 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
711 static int              base64_count = 0;
712
713 /* X0208 -> ASCII converter */
714
715 /* fold parameter */
716 static int             f_line = 0;    /* chars in line */
717 static int             f_prev = 0;
718 static int             fold_preserve_f = FALSE; /* preserve new lines */
719 static int             fold_f  = FALSE;
720 static int             fold_len  = 0;
721
722 /* options */
723 static unsigned char   kanji_intro = DEFAULT_J;
724 static unsigned char   ascii_intro = DEFAULT_R;
725
726 /* Folding */
727
728 #define FOLD_MARGIN  10
729 #define DEFAULT_FOLD 60
730
731 static int             fold_margin  = FOLD_MARGIN;
732
733 /* converters */
734
735 #ifdef DEFAULT_CODE_JIS
736 #   define  DEFAULT_CONV j_oconv
737 #endif
738 #ifdef DEFAULT_CODE_SJIS
739 #   define  DEFAULT_CONV s_oconv
740 #endif
741 #ifdef DEFAULT_CODE_EUC
742 #   define  DEFAULT_CONV e_oconv
743 #endif
744 #ifdef DEFAULT_CODE_UTF8
745 #   define  DEFAULT_CONV w_oconv
746 #endif
747
748 /* process default */
749 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
750 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
751
752 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
753 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
754 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
755 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
756 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
757 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
758 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
759
760 /* static redirections */
761
762 static  void   (*o_putc)(nkf_char c) = std_putc;
763
764 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
765 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
766
767 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
768 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
769
770 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
771
772 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
773 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
774
775 /* for strict mime */
776 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
777 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
778
779 /* Global states */
780 static int output_mode = ASCII,    /* output kanji mode */
781            input_mode =  ASCII,    /* input kanji mode */
782            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
783 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
784
785 /* X0201 / X0208 conversion tables */
786
787 /* X0201 kana conversion table */
788 /* 90-9F A0-DF */
789 static const unsigned char cv[]= {
790     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
791     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
792     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
793     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
794     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
795     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
796     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
797     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
798     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
799     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
800     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
801     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
802     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
803     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
804     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
805     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
806     0x00,0x00};
807
808
809 /* X0201 kana conversion table for daguten */
810 /* 90-9F A0-DF */
811 static const unsigned char dv[]= {
812     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
817     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
818     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
819     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
820     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
821     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
822     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
823     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
824     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
827     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
828     0x00,0x00};
829
830 /* X0201 kana conversion table for han-daguten */
831 /* 90-9F A0-DF */
832 static const unsigned char ev[]= {
833     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
838     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
844     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
845     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849     0x00,0x00};
850
851
852 /* X0208 kigou conversion table */
853 /* 0x8140 - 0x819e */
854 static const unsigned char fv[] = {
855
856     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
857     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
858     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
859     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
860     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
861     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
862     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
863     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
864     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
865     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
866     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
867     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
868 } ;
869
870
871
872 static int             file_out_f = FALSE;
873 #ifdef OVERWRITE
874 static int             overwrite_f = FALSE;
875 static int             preserve_time_f = FALSE;
876 static int             backup_f = FALSE;
877 static char            *backup_suffix = "";
878 static char *get_backup_filename(const char *suffix, const char *filename);
879 #endif
880
881 static int nlmode_f = 0;   /* CR, LF, CRLF */
882 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
883 static nkf_char prev_cr = 0; /* CR or 0 */
884 #ifdef EASYWIN /*Easy Win */
885 static int             end_check;
886 #endif /*Easy Win */
887
888 #define STD_GC_BUFSIZE (256)
889 nkf_char std_gc_buf[STD_GC_BUFSIZE];
890 nkf_char std_gc_ndx;
891
892 char* nkf_strcpy(const char *str)
893 {
894     char* result = malloc(strlen(str) + 1);
895     if (!result){
896         perror(str);
897         return "";
898     }
899     strcpy(result, str);
900     return result;
901 }
902
903 static void nkf_str_upcase(const char *src, char *dest, size_t length)
904 {
905     int i = 0;
906     for (; i < length && src[i]; i++) {
907         dest[i] = nkf_toupper(src[i]);
908     }
909     dest[i] = 0;
910 }
911
912 static nkf_encoding *nkf_enc_from_index(int idx)
913 {
914     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
915         return 0;
916     }
917     return &nkf_encoding_table[idx];
918 }
919
920 static int nkf_enc_find_index(const char *name)
921 {
922     int i, index = -1;
923     if (*name == 'X' && *(name+1) == '-') name += 2;
924     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
925         if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
926             return encoding_name_to_id_table[i].id;
927         }
928     }
929     return index;
930 }
931
932 static nkf_encoding *nkf_enc_find(const char *name)
933 {
934     int idx = -1;
935     idx = nkf_enc_find_index(name);
936     if (idx < 0) return 0;
937     return nkf_enc_from_index(idx);
938 }
939
940 #define nkf_enc_name(enc) (enc)->name
941 #define nkf_enc_to_index(enc) (enc)->id
942 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
943 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
944 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
945 #define nkf_enc_asciicompat(enc) (\
946     nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
947     nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
948 #define nkf_enc_unicode_p(enc) (\
949     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
950     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
951     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
952 #define nkf_enc_cp5022x_p(enc) (\
953     nkf_enc_to_index(enc) == CP50220 ||\
954     nkf_enc_to_index(enc) == CP50221 ||\
955     nkf_enc_to_index(enc) == CP50222)
956
957 #ifdef WIN32DLL
958 #include "nkf32dll.c"
959 #elif defined(PERL_XS)
960 #else /* WIN32DLL */
961 int main(int argc, char **argv)
962 {
963     FILE  *fin;
964     unsigned char  *cp;
965
966     char *outfname = NULL;
967     char *origfname;
968
969 #ifdef EASYWIN /*Easy Win */
970     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
971 #endif
972
973     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
974         cp = (unsigned char *)*argv;
975         options(cp);
976         if (guess_f) {
977 #ifdef CHECK_OPTION
978             int debug_f_back = debug_f;
979 #endif
980 #ifdef EXEC_IO
981             int exec_f_back = exec_f;
982 #endif
983 #ifdef X0212_ENABLE
984             int x0212_f_back = x0212_f;
985 #endif
986             int x0213_f_back = x0213_f;
987             int guess_f_back = guess_f;
988             reinit();
989             guess_f = guess_f_back;
990             mime_f = FALSE;
991 #ifdef CHECK_OPTION
992             debug_f = debug_f_back;
993 #endif
994 #ifdef EXEC_IO
995             exec_f = exec_f_back;
996 #endif
997 #ifdef X0212_ENABLE
998             x0212_f = x0212_f_back;
999 #endif
1000             x0213_f = x0213_f_back;
1001         }
1002 #ifdef EXEC_IO
1003         if (exec_f){
1004             int fds[2], pid;
1005             if (pipe(fds) < 0 || (pid = fork()) < 0){
1006                 abort();
1007             }
1008             if (pid == 0){
1009                 if (exec_f > 0){
1010                     close(fds[0]);
1011                     dup2(fds[1], 1);
1012                 }else{
1013                     close(fds[1]);
1014                     dup2(fds[0], 0);
1015                 }
1016                 execvp(argv[1], &argv[1]);
1017             }
1018             if (exec_f > 0){
1019                 close(fds[1]);
1020                 dup2(fds[0], 0);
1021             }else{
1022                 close(fds[0]);
1023                 dup2(fds[1], 1);
1024             }
1025             argc = 0;
1026             break;
1027         }
1028 #endif
1029     }
1030
1031     if (binmode_f == TRUE)
1032 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1033     if (freopen("","wb",stdout) == NULL)
1034         return (-1);
1035 #else
1036     setbinmode(stdout);
1037 #endif
1038
1039     if (unbuf_f)
1040       setbuf(stdout, (char *) NULL);
1041     else
1042       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
1043
1044     if (argc == 0) {
1045       if (binmode_f == TRUE)
1046 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1047       if (freopen("","rb",stdin) == NULL) return (-1);
1048 #else
1049       setbinmode(stdin);
1050 #endif
1051       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
1052       if (nop_f)
1053           noconvert(stdin);
1054       else {
1055           kanji_convert(stdin);
1056           if (guess_f) print_guessed_code(NULL);
1057       }
1058     } else {
1059       int nfiles = argc;
1060         int is_argument_error = FALSE;
1061       while (argc--) {
1062             input_codename = NULL;
1063             input_newline = 0;
1064 #ifdef CHECK_OPTION
1065             iconv_for_check = 0;
1066 #endif
1067           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
1068                 perror(*(argv-1));
1069                 is_argument_error = TRUE;
1070                 continue;
1071           } else {
1072 #ifdef OVERWRITE
1073               int fd = 0;
1074               int fd_backup = 0;
1075 #endif
1076
1077 /* reopen file for stdout */
1078               if (file_out_f == TRUE) {
1079 #ifdef OVERWRITE
1080                   if (overwrite_f){
1081                       outfname = malloc(strlen(origfname)
1082                                         + strlen(".nkftmpXXXXXX")
1083                                         + 1);
1084                       if (!outfname){
1085                           perror(origfname);
1086                           return -1;
1087                       }
1088                       strcpy(outfname, origfname);
1089 #ifdef MSDOS
1090                       {
1091                           int i;
1092                           for (i = strlen(outfname); i; --i){
1093                               if (outfname[i - 1] == '/'
1094                                   || outfname[i - 1] == '\\'){
1095                                   break;
1096                               }
1097                           }
1098                           outfname[i] = '\0';
1099                       }
1100                       strcat(outfname, "ntXXXXXX");
1101                       mktemp(outfname);
1102                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
1103                                 S_IREAD | S_IWRITE);
1104 #else
1105                       strcat(outfname, ".nkftmpXXXXXX");
1106                       fd = mkstemp(outfname);
1107 #endif
1108                       if (fd < 0
1109                           || (fd_backup = dup(fileno(stdout))) < 0
1110                           || dup2(fd, fileno(stdout)) < 0
1111                           ){
1112                           perror(origfname);
1113                           return -1;
1114                       }
1115                   }else
1116 #endif
1117                   if(argc == 1) {
1118                       outfname = *argv++;
1119                       argc--;
1120                   } else {
1121                       outfname = "nkf.out";
1122                   }
1123
1124                   if(freopen(outfname, "w", stdout) == NULL) {
1125                       perror (outfname);
1126                       return (-1);
1127                   }
1128                   if (binmode_f == TRUE) {
1129 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1130                       if (freopen("","wb",stdout) == NULL)
1131                            return (-1);
1132 #else
1133                       setbinmode(stdout);
1134 #endif
1135                   }
1136               }
1137               if (binmode_f == TRUE)
1138 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1139                  if (freopen("","rb",fin) == NULL)
1140                     return (-1);
1141 #else
1142                  setbinmode(fin);
1143 #endif
1144               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
1145               if (nop_f)
1146                   noconvert(fin);
1147               else {
1148                   char *filename = NULL;
1149                   kanji_convert(fin);
1150                   if (nfiles > 1) filename = origfname;
1151                   if (guess_f) print_guessed_code(filename);
1152               }
1153               fclose(fin);
1154 #ifdef OVERWRITE
1155               if (overwrite_f) {
1156                   struct stat     sb;
1157 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1158                   time_t tb[2];
1159 #else
1160                   struct utimbuf  tb;
1161 #endif
1162
1163                   fflush(stdout);
1164                   close(fd);
1165                   if (dup2(fd_backup, fileno(stdout)) < 0){
1166                       perror("dup2");
1167                   }
1168                   if (stat(origfname, &sb)) {
1169                       fprintf(stderr, "Can't stat %s\n", origfname);
1170                   }
1171                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
1172                   if (chmod(outfname, sb.st_mode)) {
1173                       fprintf(stderr, "Can't set permission %s\n", outfname);
1174                   }
1175
1176                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
1177                     if(preserve_time_f){
1178 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1179                         tb[0] = tb[1] = sb.st_mtime;
1180                         if (utime(outfname, tb)) {
1181                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1182                         }
1183 #else
1184                         tb.actime  = sb.st_atime;
1185                         tb.modtime = sb.st_mtime;
1186                         if (utime(outfname, &tb)) {
1187                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1188                         }
1189 #endif
1190                     }
1191                     if(backup_f){
1192                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
1193 #ifdef MSDOS
1194                         unlink(backup_filename);
1195 #endif
1196                         if (rename(origfname, backup_filename)) {
1197                             perror(backup_filename);
1198                             fprintf(stderr, "Can't rename %s to %s\n",
1199                                     origfname, backup_filename);
1200                         }
1201                     }else{
1202 #ifdef MSDOS
1203                         if (unlink(origfname)){
1204                             perror(origfname);
1205                         }
1206 #endif
1207                     }
1208                   if (rename(outfname, origfname)) {
1209                       perror(origfname);
1210                       fprintf(stderr, "Can't rename %s to %s\n",
1211                               outfname, origfname);
1212                   }
1213                   free(outfname);
1214               }
1215 #endif
1216           }
1217       }
1218         if (is_argument_error)
1219             return(-1);
1220     }
1221 #ifdef EASYWIN /*Easy Win */
1222     if (file_out_f == FALSE)
1223         scanf("%d",&end_check);
1224     else
1225         fclose(stdout);
1226 #else /* for Other OS */
1227     if (file_out_f == TRUE)
1228         fclose(stdout);
1229 #endif /*Easy Win */
1230     return (0);
1231 }
1232 #endif /* WIN32DLL */
1233
1234 #ifdef OVERWRITE
1235 char *get_backup_filename(const char *suffix, const char *filename)
1236 {
1237     char *backup_filename;
1238     int asterisk_count = 0;
1239     int i, j;
1240     int filename_length = strlen(filename);
1241
1242     for(i = 0; suffix[i]; i++){
1243         if(suffix[i] == '*') asterisk_count++;
1244     }
1245
1246     if(asterisk_count){
1247         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1248         if (!backup_filename){
1249             perror("Can't malloc backup filename.");
1250             return NULL;
1251         }
1252
1253         for(i = 0, j = 0; suffix[i];){
1254             if(suffix[i] == '*'){
1255                 backup_filename[j] = '\0';
1256                 strncat(backup_filename, filename, filename_length);
1257                 i++;
1258                 j += filename_length;
1259             }else{
1260                 backup_filename[j++] = suffix[i++];
1261             }
1262         }
1263         backup_filename[j] = '\0';
1264     }else{
1265         j = strlen(suffix) + filename_length;
1266         backup_filename = malloc( + 1);
1267         strcpy(backup_filename, filename);
1268         strcat(backup_filename, suffix);
1269         backup_filename[j] = '\0';
1270     }
1271     return backup_filename;
1272 }
1273 #endif
1274
1275 static const struct {
1276     const char *name;
1277     const char *alias;
1278 } long_option[] = {
1279     {"ic=", ""},
1280     {"oc=", ""},
1281     {"base64","jMB"},
1282     {"euc","e"},
1283     {"euc-input","E"},
1284     {"fj","jm"},
1285     {"help","v"},
1286     {"jis","j"},
1287     {"jis-input","J"},
1288     {"mac","sLm"},
1289     {"mime","jM"},
1290     {"mime-input","m"},
1291     {"msdos","sLw"},
1292     {"sjis","s"},
1293     {"sjis-input","S"},
1294     {"unix","eLu"},
1295     {"version","V"},
1296     {"windows","sLw"},
1297     {"hiragana","h1"},
1298     {"katakana","h2"},
1299     {"katakana-hiragana","h3"},
1300     {"guess=", ""},
1301     {"guess", "g2"},
1302     {"cp932", ""},
1303     {"no-cp932", ""},
1304 #ifdef X0212_ENABLE
1305     {"x0212", ""},
1306 #endif
1307 #ifdef UTF8_OUTPUT_ENABLE
1308     {"utf8", "w"},
1309     {"utf16", "w16"},
1310     {"ms-ucs-map", ""},
1311     {"fb-skip", ""},
1312     {"fb-html", ""},
1313     {"fb-xml", ""},
1314     {"fb-perl", ""},
1315     {"fb-java", ""},
1316     {"fb-subchar", ""},
1317     {"fb-subchar=", ""},
1318 #endif
1319 #ifdef UTF8_INPUT_ENABLE
1320     {"utf8-input", "W"},
1321     {"utf16-input", "W16"},
1322     {"no-cp932ext", ""},
1323     {"no-best-fit-chars",""},
1324 #endif
1325 #ifdef UNICODE_NORMALIZATION
1326     {"utf8mac-input", ""},
1327 #endif
1328 #ifdef OVERWRITE
1329     {"overwrite", ""},
1330     {"overwrite=", ""},
1331     {"in-place", ""},
1332     {"in-place=", ""},
1333 #endif
1334 #ifdef INPUT_OPTION
1335     {"cap-input", ""},
1336     {"url-input", ""},
1337 #endif
1338 #ifdef NUMCHAR_OPTION
1339     {"numchar-input", ""},
1340 #endif
1341 #ifdef CHECK_OPTION
1342     {"no-output", ""},
1343     {"debug", ""},
1344 #endif
1345 #ifdef SHIFTJIS_CP932
1346     {"cp932inv", ""},
1347 #endif
1348 #ifdef EXEC_IO
1349     {"exec-in", ""},
1350     {"exec-out", ""},
1351 #endif
1352     {"prefix=", ""},
1353 };
1354
1355 static int option_mode = 0;
1356
1357 void options(unsigned char *cp)
1358 {
1359     nkf_char i, j;
1360     unsigned char *p;
1361     unsigned char *cp_back = NULL;
1362     char codeset[32];
1363     nkf_encoding *enc;
1364
1365     if (option_mode==1)
1366         return;
1367     while(*cp && *cp++!='-');
1368     while (*cp || cp_back) {
1369         if(!*cp){
1370             cp = cp_back;
1371             cp_back = NULL;
1372             continue;
1373         }
1374         p = 0;
1375         switch (*cp++) {
1376         case '-':  /* literal options */
1377             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1378                 option_mode = 1;
1379                 return;
1380             }
1381             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1382                 p = (unsigned char *)long_option[i].name;
1383                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1384                 if (*p == cp[j] || cp[j] == SP){
1385                     p = &cp[j] + 1;
1386                     break;
1387                 }
1388                 p = 0;
1389             }
1390             if (p == 0) {
1391                 fprintf(stderr, "unknown long option: --%s\n", cp);
1392                 return;
1393             }
1394             while(*cp && *cp != SP && cp++);
1395             if (long_option[i].alias[0]){
1396                 cp_back = cp;
1397                 cp = (unsigned char *)long_option[i].alias;
1398             }else{
1399                 if (strcmp(long_option[i].name, "ic=") == 0){
1400                     nkf_str_upcase((char *)p, codeset, 32);
1401                     enc = nkf_enc_find(codeset);
1402                     if (!enc) continue;
1403                     input_encoding = enc;
1404                     switch (nkf_enc_to_index(input_encoding)) {
1405                     case CP50220:
1406                     case CP50221:
1407                     case CP50222:
1408 #ifdef SHIFTJIS_CP932
1409                         cp51932_f = TRUE;
1410 #endif
1411 #ifdef UTF8_OUTPUT_ENABLE
1412                         ms_ucs_map_f = UCS_MAP_CP932;
1413 #endif
1414                         break;
1415                     case ISO_2022_JP_1:
1416 #ifdef X0212_ENABLE
1417                         x0212_f = TRUE;
1418 #endif
1419                         break;
1420                     case ISO_2022_JP_3:
1421 #ifdef X0212_ENABLE
1422                         x0212_f = TRUE;
1423 #endif
1424                         x0213_f = TRUE;
1425                         break;
1426                     case WINDOWS_31J:
1427 #ifdef SHIFTJIS_CP932
1428                         cp51932_f = TRUE;
1429 #endif
1430 #ifdef UTF8_OUTPUT_ENABLE
1431                         ms_ucs_map_f = UCS_MAP_CP932;
1432 #endif
1433                         break;
1434                     case CP10001:
1435 #ifdef SHIFTJIS_CP932
1436                         cp51932_f = TRUE;
1437 #endif
1438 #ifdef UTF8_OUTPUT_ENABLE
1439                         ms_ucs_map_f = UCS_MAP_CP10001;
1440 #endif
1441                         break;
1442                     case CP51932:
1443 #ifdef SHIFTJIS_CP932
1444                         cp51932_f = TRUE;
1445 #endif
1446 #ifdef UTF8_OUTPUT_ENABLE
1447                         ms_ucs_map_f = UCS_MAP_CP932;
1448 #endif
1449                         break;
1450                     case EUCJP_MS:
1451 #ifdef SHIFTJIS_CP932
1452                         cp51932_f = FALSE;
1453 #endif
1454 #ifdef UTF8_OUTPUT_ENABLE
1455                         ms_ucs_map_f = UCS_MAP_MS;
1456 #endif
1457                         break;
1458                     case EUCJP_ASCII:
1459 #ifdef SHIFTJIS_CP932
1460                         cp51932_f = FALSE;
1461 #endif
1462 #ifdef UTF8_OUTPUT_ENABLE
1463                         ms_ucs_map_f = UCS_MAP_ASCII;
1464 #endif
1465                         break;
1466                     case SHIFT_JISX0213:
1467                     case SHIFT_JIS_2004:
1468                         x0213_f = TRUE;
1469 #ifdef SHIFTJIS_CP932
1470                         cp51932_f = FALSE;
1471 #endif
1472                         break;
1473                     case EUC_JISX0213:
1474                     case EUC_JIS_2004:
1475                         x0213_f = TRUE;
1476 #ifdef SHIFTJIS_CP932
1477                         cp51932_f = FALSE;
1478 #endif
1479                         break;
1480 #ifdef UTF8_INPUT_ENABLE
1481 #ifdef UNICODE_NORMALIZATION
1482                     case UTF8_MAC:
1483                         nfc_f = TRUE;
1484                         break;
1485 #endif
1486                     case UTF_16:
1487                     case UTF_16BE:
1488                     case UTF_16BE_BOM:
1489                         input_endian = ENDIAN_BIG;
1490                         break;
1491                     case UTF_16LE:
1492                     case UTF_16LE_BOM:
1493                         input_endian = ENDIAN_LITTLE;
1494                         break;
1495                     case UTF_32:
1496                     case UTF_32BE:
1497                     case UTF_32BE_BOM:
1498                         input_endian = ENDIAN_BIG;
1499                         break;
1500                     case UTF_32LE:
1501                     case UTF_32LE_BOM:
1502                         input_endian = ENDIAN_LITTLE;
1503                         break;
1504 #endif
1505                     }
1506                     continue;
1507                 }
1508                 if (strcmp(long_option[i].name, "oc=") == 0){
1509                     x0201_f = FALSE;
1510                     nkf_str_upcase((char *)p, codeset, 32);
1511                     enc = nkf_enc_find(codeset);
1512                     if (enc <= 0) continue;
1513                     output_encoding = enc;
1514                     switch (nkf_enc_to_index(output_encoding)) {
1515                     case CP50220:
1516                         x0201_f = TRUE;
1517 #ifdef SHIFTJIS_CP932
1518                         cp932inv_f = FALSE;
1519 #endif
1520 #ifdef UTF8_OUTPUT_ENABLE
1521                         ms_ucs_map_f = UCS_MAP_CP932;
1522 #endif
1523                         break;
1524                     case CP50221:
1525 #ifdef SHIFTJIS_CP932
1526                         cp932inv_f = FALSE;
1527 #endif
1528 #ifdef UTF8_OUTPUT_ENABLE
1529                         ms_ucs_map_f = UCS_MAP_CP932;
1530 #endif
1531                         break;
1532                     case ISO_2022_JP_1:
1533 #ifdef X0212_ENABLE
1534                         x0212_f = TRUE;
1535 #endif
1536 #ifdef SHIFTJIS_CP932
1537                         cp932inv_f = FALSE;
1538 #endif
1539                         break;
1540                     case ISO_2022_JP_3:
1541 #ifdef X0212_ENABLE
1542                         x0212_f = TRUE;
1543 #endif
1544                         x0213_f = TRUE;
1545 #ifdef SHIFTJIS_CP932
1546                         cp932inv_f = FALSE;
1547 #endif
1548                         break;
1549                     case WINDOWS_31J:
1550 #ifdef UTF8_OUTPUT_ENABLE
1551                         ms_ucs_map_f = UCS_MAP_CP932;
1552 #endif
1553                         break;
1554                     case CP10001:
1555 #ifdef UTF8_OUTPUT_ENABLE
1556                         ms_ucs_map_f = UCS_MAP_CP10001;
1557 #endif
1558                         break;
1559                     case CP51932:
1560 #ifdef SHIFTJIS_CP932
1561                         cp932inv_f = FALSE;
1562 #endif
1563 #ifdef UTF8_OUTPUT_ENABLE
1564                         ms_ucs_map_f = UCS_MAP_CP932;
1565 #endif
1566                         break;
1567                     case EUCJP_MS:
1568 #ifdef X0212_ENABLE
1569                         x0212_f = TRUE;
1570 #endif
1571 #ifdef UTF8_OUTPUT_ENABLE
1572                         ms_ucs_map_f = UCS_MAP_MS;
1573 #endif
1574                         break;
1575                     case EUCJP_ASCII:
1576 #ifdef X0212_ENABLE
1577                         x0212_f = TRUE;
1578 #endif
1579 #ifdef UTF8_OUTPUT_ENABLE
1580                         ms_ucs_map_f = UCS_MAP_ASCII;
1581 #endif
1582                         break;
1583                     case SHIFT_JISX0213:
1584                     case SHIFT_JIS_2004:
1585                         x0213_f = TRUE;
1586 #ifdef SHIFTJIS_CP932
1587                         cp932inv_f = FALSE;
1588 #endif
1589                         break;
1590                     case EUC_JISX0213:
1591                     case EUC_JIS_2004:
1592 #ifdef X0212_ENABLE
1593                         x0212_f = TRUE;
1594 #endif
1595                         x0213_f = TRUE;
1596 #ifdef SHIFTJIS_CP932
1597                         cp932inv_f = FALSE;
1598 #endif
1599                         break;
1600 #ifdef UTF8_OUTPUT_ENABLE
1601                     case UTF_8_BOM:
1602                         output_bom_f = TRUE;
1603                         break;
1604                     case UTF_16:
1605                     case UTF_16BE_BOM:
1606                         output_bom_f = TRUE;
1607                         break;
1608                     case UTF_16LE:
1609                         output_endian = ENDIAN_LITTLE;
1610                         output_bom_f = FALSE;
1611                         break;
1612                     case UTF_16LE_BOM:
1613                         output_endian = ENDIAN_LITTLE;
1614                         output_bom_f = TRUE;
1615                         break;
1616                     case UTF_32BE_BOM:
1617                         output_bom_f = TRUE;
1618                         break;
1619                     case UTF_32LE:
1620                         output_endian = ENDIAN_LITTLE;
1621                         output_bom_f = FALSE;
1622                         break;
1623                     case UTF_32LE_BOM:
1624                         output_endian = ENDIAN_LITTLE;
1625                         output_bom_f = TRUE;
1626                         break;
1627 #endif
1628                     }
1629                     continue;
1630                 }
1631                 if (strcmp(long_option[i].name, "guess=") == 0){
1632                     if (p[0] == '0' || p[0] == '1') {
1633                         guess_f = 1;
1634                     } else {
1635                         guess_f = 2;
1636                     }
1637                     continue;
1638                 }
1639 #ifdef OVERWRITE
1640                 if (strcmp(long_option[i].name, "overwrite") == 0){
1641                     file_out_f = TRUE;
1642                     overwrite_f = TRUE;
1643                     preserve_time_f = TRUE;
1644                     continue;
1645                 }
1646                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1647                     file_out_f = TRUE;
1648                     overwrite_f = TRUE;
1649                     preserve_time_f = TRUE;
1650                     backup_f = TRUE;
1651                     backup_suffix = malloc(strlen((char *) p) + 1);
1652                     strcpy(backup_suffix, (char *) p);
1653                     continue;
1654                 }
1655                 if (strcmp(long_option[i].name, "in-place") == 0){
1656                     file_out_f = TRUE;
1657                     overwrite_f = TRUE;
1658                     preserve_time_f = FALSE;
1659                     continue;
1660                 }
1661                 if (strcmp(long_option[i].name, "in-place=") == 0){
1662                     file_out_f = TRUE;
1663                     overwrite_f = TRUE;
1664                     preserve_time_f = FALSE;
1665                     backup_f = TRUE;
1666                     backup_suffix = malloc(strlen((char *) p) + 1);
1667                     strcpy(backup_suffix, (char *) p);
1668                     continue;
1669                 }
1670 #endif
1671 #ifdef INPUT_OPTION
1672                 if (strcmp(long_option[i].name, "cap-input") == 0){
1673                     cap_f = TRUE;
1674                     continue;
1675                 }
1676                 if (strcmp(long_option[i].name, "url-input") == 0){
1677                     url_f = TRUE;
1678                     continue;
1679                 }
1680 #endif
1681 #ifdef NUMCHAR_OPTION
1682                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1683                     numchar_f = TRUE;
1684                     continue;
1685                 }
1686 #endif
1687 #ifdef CHECK_OPTION
1688                 if (strcmp(long_option[i].name, "no-output") == 0){
1689                     noout_f = TRUE;
1690                     continue;
1691                 }
1692                 if (strcmp(long_option[i].name, "debug") == 0){
1693                     debug_f = TRUE;
1694                     continue;
1695                 }
1696 #endif
1697                 if (strcmp(long_option[i].name, "cp932") == 0){
1698 #ifdef SHIFTJIS_CP932
1699                     cp51932_f = TRUE;
1700                     cp932inv_f = TRUE;
1701 #endif
1702 #ifdef UTF8_OUTPUT_ENABLE
1703                     ms_ucs_map_f = UCS_MAP_CP932;
1704 #endif
1705                     continue;
1706                 }
1707                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1708 #ifdef SHIFTJIS_CP932
1709                     cp51932_f = FALSE;
1710                     cp932inv_f = FALSE;
1711 #endif
1712 #ifdef UTF8_OUTPUT_ENABLE
1713                     ms_ucs_map_f = UCS_MAP_ASCII;
1714 #endif
1715                     continue;
1716                 }
1717 #ifdef SHIFTJIS_CP932
1718                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1719                     cp932inv_f = TRUE;
1720                     continue;
1721                 }
1722 #endif
1723
1724 #ifdef X0212_ENABLE
1725                 if (strcmp(long_option[i].name, "x0212") == 0){
1726                     x0212_f = TRUE;
1727                     continue;
1728                 }
1729 #endif
1730
1731 #ifdef EXEC_IO
1732                   if (strcmp(long_option[i].name, "exec-in") == 0){
1733                       exec_f = 1;
1734                       return;
1735                   }
1736                   if (strcmp(long_option[i].name, "exec-out") == 0){
1737                       exec_f = -1;
1738                       return;
1739                   }
1740 #endif
1741 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1742                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1743                     no_cp932ext_f = TRUE;
1744                     continue;
1745                 }
1746                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1747                     no_best_fit_chars_f = TRUE;
1748                     continue;
1749                 }
1750                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1751                     encode_fallback = NULL;
1752                     continue;
1753                 }
1754                 if (strcmp(long_option[i].name, "fb-html") == 0){
1755                     encode_fallback = encode_fallback_html;
1756                     continue;
1757                 }
1758                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1759                     encode_fallback = encode_fallback_xml;
1760                     continue;
1761                 }
1762                 if (strcmp(long_option[i].name, "fb-java") == 0){
1763                     encode_fallback = encode_fallback_java;
1764                     continue;
1765                 }
1766                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1767                     encode_fallback = encode_fallback_perl;
1768                     continue;
1769                 }
1770                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1771                     encode_fallback = encode_fallback_subchar;
1772                     continue;
1773                 }
1774                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1775                     encode_fallback = encode_fallback_subchar;
1776                     unicode_subchar = 0;
1777                     if (p[0] != '0'){
1778                         /* decimal number */
1779                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1780                             unicode_subchar *= 10;
1781                             unicode_subchar += hex2bin(p[i]);
1782                         }
1783                     }else if(p[1] == 'x' || p[1] == 'X'){
1784                         /* hexadecimal number */
1785                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1786                             unicode_subchar <<= 4;
1787                             unicode_subchar |= hex2bin(p[i]);
1788                         }
1789                     }else{
1790                         /* octal number */
1791                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1792                             unicode_subchar *= 8;
1793                             unicode_subchar += hex2bin(p[i]);
1794                         }
1795                     }
1796                     w16e_conv(unicode_subchar, &i, &j);
1797                     unicode_subchar = i<<8 | j;
1798                     continue;
1799                 }
1800 #endif
1801 #ifdef UTF8_OUTPUT_ENABLE
1802                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1803                     ms_ucs_map_f = UCS_MAP_MS;
1804                     continue;
1805                 }
1806 #endif
1807 #ifdef UNICODE_NORMALIZATION
1808                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1809                     nfc_f = TRUE;
1810                     continue;
1811                 }
1812 #endif
1813                 if (strcmp(long_option[i].name, "prefix=") == 0){
1814                     if (nkf_isgraph(p[0])){
1815                         for (i = 1; nkf_isgraph(p[i]); i++){
1816                             prefix_table[p[i]] = p[0];
1817                         }
1818                     }
1819                     continue;
1820                 }
1821             }
1822             continue;
1823         case 'b':           /* buffered mode */
1824             unbuf_f = FALSE;
1825             continue;
1826         case 'u':           /* non bufferd mode */
1827             unbuf_f = TRUE;
1828             continue;
1829         case 't':           /* transparent mode */
1830             if (*cp=='1') {
1831                 /* alias of -t */
1832                 cp++;
1833                 nop_f = TRUE;
1834             } else if (*cp=='2') {
1835                 /*
1836                  * -t with put/get
1837                  *
1838                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1839                  *
1840                  */
1841                 cp++;
1842                 nop_f = 2;
1843             } else
1844                 nop_f = TRUE;
1845             continue;
1846         case 'j':           /* JIS output */
1847         case 'n':
1848             output_encoding = nkf_enc_from_index(ISO_2022_JP);
1849             continue;
1850         case 'e':           /* AT&T EUC output */
1851             cp932inv_f = FALSE;
1852             output_encoding = nkf_enc_from_index(EUC_JP);
1853             continue;
1854         case 's':           /* SJIS output */
1855             output_encoding = nkf_enc_from_index(WINDOWS_31J);
1856             continue;
1857         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1858             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1859             input_encoding = nkf_enc_from_index(ISO_8859_1);
1860             continue;
1861         case 'i':           /* Kanji IN ESC-$-@/B */
1862             if (*cp=='@'||*cp=='B')
1863                 kanji_intro = *cp++;
1864             continue;
1865         case 'o':           /* ASCII IN ESC-(-J/B */
1866             if (*cp=='J'||*cp=='B'||*cp=='H')
1867                 ascii_intro = *cp++;
1868             continue;
1869         case 'h':
1870             /*
1871                 bit:1   katakana->hiragana
1872                 bit:2   hiragana->katakana
1873             */
1874             if ('9'>= *cp && *cp>='0')
1875                 hira_f |= (*cp++ -'0');
1876             else
1877                 hira_f |= 1;
1878             continue;
1879         case 'r':
1880             rot_f = TRUE;
1881             continue;
1882 #if defined(MSDOS) || defined(__OS2__)
1883         case 'T':
1884             binmode_f = FALSE;
1885             continue;
1886 #endif
1887 #ifndef PERL_XS
1888         case 'V':
1889             show_configuration();
1890             exit(1);
1891             break;
1892         case 'v':
1893             usage();
1894             exit(1);
1895             break;
1896 #endif
1897 #ifdef UTF8_OUTPUT_ENABLE
1898         case 'w':           /* UTF-8 output */
1899             if (cp[0] == '8') {
1900                 cp++;
1901                 if (cp[0] == '0'){
1902                     cp++;
1903                     output_encoding = nkf_enc_from_index(UTF_8N);
1904                 } else {
1905                     output_bom_f = TRUE;
1906                     output_encoding = nkf_enc_from_index(UTF_8_BOM);
1907                 }
1908             } else {
1909                 int enc_idx;
1910                 if ('1'== cp[0] && '6'==cp[1]) {
1911                     cp += 2;
1912                     enc_idx = UTF_16;
1913                 } else if ('3'== cp[0] && '2'==cp[1]) {
1914                     cp += 2;
1915                     enc_idx = UTF_32;
1916                 } else {
1917                     output_encoding = nkf_enc_from_index(UTF_8);
1918                     continue;
1919                 }
1920                 if (cp[0]=='L') {
1921                     cp++;
1922                     output_endian = ENDIAN_LITTLE;
1923                 } else if (cp[0] == 'B') {
1924                     cp++;
1925                 } else {
1926                     output_encoding = nkf_enc_from_index(enc_idx);
1927                     continue;
1928                 }
1929                 if (cp[0] == '0'){
1930                     cp++;
1931                     enc_idx = enc_idx == UTF_16
1932                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1933                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1934                 } else {
1935                     output_bom_f = TRUE;
1936                     enc_idx = enc_idx == UTF_16
1937                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
1938                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
1939                 }
1940                 output_encoding = nkf_enc_from_index(enc_idx);
1941             }
1942             continue;
1943 #endif
1944 #ifdef UTF8_INPUT_ENABLE
1945         case 'W':           /* UTF input */
1946             if (cp[0] == '8') {
1947                 cp++;
1948                 input_encoding = nkf_enc_from_index(UTF_8);
1949             }else{
1950                 int enc_idx;
1951                 if ('1'== cp[0] && '6'==cp[1]) {
1952                     cp += 2;
1953                     input_endian = ENDIAN_BIG;
1954                     enc_idx = UTF_16;
1955                 } else if ('3'== cp[0] && '2'==cp[1]) {
1956                     cp += 2;
1957                     input_endian = ENDIAN_BIG;
1958                     enc_idx = UTF_32;
1959                 } else {
1960                     input_encoding = nkf_enc_from_index(UTF_8);
1961                     continue;
1962                 }
1963                 if (cp[0]=='L') {
1964                     cp++;
1965                     input_endian = ENDIAN_LITTLE;
1966                 } else if (cp[0] == 'B') {
1967                     cp++;
1968                     input_endian = ENDIAN_BIG;
1969                 }
1970                 enc_idx = enc_idx == UTF_16
1971                     ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1972                     : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1973                 input_encoding = nkf_enc_from_index(enc_idx);
1974             }
1975             continue;
1976 #endif
1977         /* Input code assumption */
1978         case 'J':   /* ISO-2022-JP input */
1979             input_encoding = nkf_enc_from_index(ISO_2022_JP);
1980             continue;
1981         case 'E':   /* EUC-JP input */
1982             input_encoding = nkf_enc_from_index(EUC_JP);
1983             continue;
1984         case 'S':   /* Windows-31J input */
1985             input_encoding = nkf_enc_from_index(WINDOWS_31J);
1986             continue;
1987         case 'Z':   /* Convert X0208 alphabet to asii */
1988             /* alpha_f
1989                bit:0   Convert JIS X 0208 Alphabet to ASCII
1990                bit:1   Convert Kankaku to one space
1991                bit:2   Convert Kankaku to two spaces
1992                bit:3   Convert HTML Entity
1993                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1994             */
1995             while ('0'<= *cp && *cp <='9') {
1996                 alpha_f |= 1 << (*cp++ - '0');
1997             }
1998             if (!alpha_f) alpha_f = 1;
1999             continue;
2000         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
2001             x0201_f = FALSE;    /* No X0201->X0208 conversion */
2002             /* accept  X0201
2003                     ESC-(-I     in JIS, EUC, MS Kanji
2004                     SI/SO       in JIS, EUC, MS Kanji
2005                     SSO         in EUC, JIS, not in MS Kanji
2006                     MS Kanji (0xa0-0xdf)
2007                output  X0201
2008                     ESC-(-I     in JIS (0x20-0x5f)
2009                     SSO         in EUC (0xa0-0xdf)
2010                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
2011             */
2012             continue;
2013         case 'X':   /* Convert X0201 kana to X0208 */
2014             x0201_f = TRUE;
2015             continue;
2016         case 'F':   /* prserve new lines */
2017             fold_preserve_f = TRUE;
2018         case 'f':   /* folding -f60 or -f */
2019             fold_f = TRUE;
2020             fold_len = 0;
2021             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2022                 fold_len *= 10;
2023                 fold_len += *cp++ - '0';
2024             }
2025             if (!(0<fold_len && fold_len<BUFSIZ))
2026                 fold_len = DEFAULT_FOLD;
2027             if (*cp=='-') {
2028                 fold_margin = 0;
2029                 cp++;
2030                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2031                     fold_margin *= 10;
2032                     fold_margin += *cp++ - '0';
2033                 }
2034             }
2035             continue;
2036         case 'm':   /* MIME support */
2037             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
2038             if (*cp=='B'||*cp=='Q') {
2039                 mime_decode_mode = *cp++;
2040                 mimebuf_f = FIXED_MIME;
2041             } else if (*cp=='N') {
2042                 mime_f = TRUE; cp++;
2043             } else if (*cp=='S') {
2044                 mime_f = STRICT_MIME; cp++;
2045             } else if (*cp=='0') {
2046                 mime_decode_f = FALSE;
2047                 mime_f = FALSE; cp++;
2048             }
2049             continue;
2050         case 'M':   /* MIME output */
2051             if (*cp=='B') {
2052                 mimeout_mode = 'B';
2053                 mimeout_f = FIXED_MIME; cp++;
2054             } else if (*cp=='Q') {
2055                 mimeout_mode = 'Q';
2056                 mimeout_f = FIXED_MIME; cp++;
2057             } else {
2058                 mimeout_f = TRUE;
2059             }
2060             continue;
2061         case 'B':   /* Broken JIS support */
2062             /*  bit:0   no ESC JIS
2063                 bit:1   allow any x on ESC-(-x or ESC-$-x
2064                 bit:2   reset to ascii on NL
2065             */
2066             if ('9'>= *cp && *cp>='0')
2067                 broken_f |= 1<<(*cp++ -'0');
2068             else
2069                 broken_f |= TRUE;
2070             continue;
2071 #ifndef PERL_XS
2072         case 'O':/* for Output file */
2073             file_out_f = TRUE;
2074             continue;
2075 #endif
2076         case 'c':/* add cr code */
2077             nlmode_f = CRLF;
2078             continue;
2079         case 'd':/* delete cr code */
2080             nlmode_f = LF;
2081             continue;
2082         case 'I':   /* ISO-2022-JP output */
2083             iso2022jp_f = TRUE;
2084             continue;
2085         case 'L':  /* line mode */
2086             if (*cp=='u') {         /* unix */
2087                 nlmode_f = LF; cp++;
2088             } else if (*cp=='m') { /* mac */
2089                 nlmode_f = CR; cp++;
2090             } else if (*cp=='w') { /* windows */
2091                 nlmode_f = CRLF; cp++;
2092             } else if (*cp=='0') { /* no conversion  */
2093                 nlmode_f = 0; cp++;
2094             }
2095             continue;
2096 #ifndef PERL_XS
2097         case 'g':
2098             if ('2' <= *cp && *cp <= '9') {
2099                 guess_f = 2;
2100                 cp++;
2101             } else if (*cp == '0' || *cp == '1') {
2102                 guess_f = 1;
2103                 cp++;
2104             } else {
2105                 guess_f = 1;
2106             }
2107             continue;
2108 #endif
2109         case SP:
2110         /* module muliple options in a string are allowed for Perl moudle  */
2111             while(*cp && *cp++!='-');
2112             continue;
2113         default:
2114             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
2115             /* bogus option but ignored */
2116             continue;
2117         }
2118     }
2119 }
2120
2121 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2122 {
2123     if (iconv_func){
2124         struct input_code *p = input_code_list;
2125         while (p->name){
2126             if (iconv_func == p->iconv_func){
2127                 return p;
2128             }
2129             p++;
2130         }
2131     }
2132     return 0;
2133 }
2134
2135 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2136 {
2137 #ifdef INPUT_CODE_FIX
2138     if (f || !input_encoding)
2139 #endif
2140         if (estab_f != f){
2141             estab_f = f;
2142         }
2143
2144     if (iconv_func
2145 #ifdef INPUT_CODE_FIX
2146         && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
2147 #endif
2148         ){
2149         iconv = iconv_func;
2150     }
2151 #ifdef CHECK_OPTION
2152     if (estab_f && iconv_for_check != iconv){
2153         struct input_code *p = find_inputcode_byfunc(iconv);
2154         if (p){
2155             set_input_codename(p->name);
2156             debug(p->name);
2157         }
2158         iconv_for_check = iconv;
2159     }
2160 #endif
2161 }
2162
2163 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
2164 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
2165 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
2166 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
2167 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2168 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
2169 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
2170 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
2171
2172 #define SCORE_INIT (SCORE_iMIME)
2173
2174 static const char score_table_A0[] = {
2175     0, 0, 0, 0,
2176     0, 0, 0, 0,
2177     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2178     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2179 };
2180
2181 static const char score_table_F0[] = {
2182     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2183     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2184     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2185     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2186 };
2187
2188 void set_code_score(struct input_code *ptr, nkf_char score)
2189 {
2190     if (ptr){
2191         ptr->score |= score;
2192     }
2193 }
2194
2195 void clr_code_score(struct input_code *ptr, nkf_char score)
2196 {
2197     if (ptr){
2198         ptr->score &= ~score;
2199     }
2200 }
2201
2202 void code_score(struct input_code *ptr)
2203 {
2204     nkf_char c2 = ptr->buf[0];
2205 #ifdef UTF8_OUTPUT_ENABLE
2206     nkf_char c1 = ptr->buf[1];
2207 #endif
2208     if (c2 < 0){
2209         set_code_score(ptr, SCORE_ERROR);
2210     }else if (c2 == SSO){
2211         set_code_score(ptr, SCORE_KANA);
2212     }else if (c2 == 0x8f){
2213         set_code_score(ptr, SCORE_X0212);
2214 #ifdef UTF8_OUTPUT_ENABLE
2215     }else if (!e2w_conv(c2, c1)){
2216         set_code_score(ptr, SCORE_NO_EXIST);
2217 #endif
2218     }else if ((c2 & 0x70) == 0x20){
2219         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2220     }else if ((c2 & 0x70) == 0x70){
2221         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2222     }else if ((c2 & 0x70) >= 0x50){
2223         set_code_score(ptr, SCORE_L2);
2224     }
2225 }
2226
2227 void status_disable(struct input_code *ptr)
2228 {
2229     ptr->stat = -1;
2230     ptr->buf[0] = -1;
2231     code_score(ptr);
2232     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2233 }
2234
2235 void status_push_ch(struct input_code *ptr, nkf_char c)
2236 {
2237     ptr->buf[ptr->index++] = c;
2238 }
2239
2240 void status_clear(struct input_code *ptr)
2241 {
2242     ptr->stat = 0;
2243     ptr->index = 0;
2244 }
2245
2246 void status_reset(struct input_code *ptr)
2247 {
2248     status_clear(ptr);
2249     ptr->score = SCORE_INIT;
2250 }
2251
2252 void status_reinit(struct input_code *ptr)
2253 {
2254     status_reset(ptr);
2255     ptr->_file_stat = 0;
2256 }
2257
2258 void status_check(struct input_code *ptr, nkf_char c)
2259 {
2260     if (c <= DEL && estab_f){
2261         status_reset(ptr);
2262     }
2263 }
2264
2265 void s_status(struct input_code *ptr, nkf_char c)
2266 {
2267     switch(ptr->stat){
2268       case -1:
2269           status_check(ptr, c);
2270           break;
2271       case 0:
2272           if (c <= DEL){
2273               break;
2274 #ifdef NUMCHAR_OPTION
2275           }else if (is_unicode_capsule(c)){
2276               break;
2277 #endif
2278           }else if (0xa1 <= c && c <= 0xdf){
2279               status_push_ch(ptr, SSO);
2280               status_push_ch(ptr, c);
2281               code_score(ptr);
2282               status_clear(ptr);
2283           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2284               ptr->stat = 1;
2285               status_push_ch(ptr, c);
2286           }else if (0xed <= c && c <= 0xee){
2287               ptr->stat = 3;
2288               status_push_ch(ptr, c);
2289 #ifdef SHIFTJIS_CP932
2290           }else if (is_ibmext_in_sjis(c)){
2291               ptr->stat = 2;
2292               status_push_ch(ptr, c);
2293 #endif /* SHIFTJIS_CP932 */
2294 #ifdef X0212_ENABLE
2295           }else if (0xf0 <= c && c <= 0xfc){
2296               ptr->stat = 1;
2297               status_push_ch(ptr, c);
2298 #endif /* X0212_ENABLE */
2299           }else{
2300               status_disable(ptr);
2301           }
2302           break;
2303       case 1:
2304           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2305               status_push_ch(ptr, c);
2306               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2307               code_score(ptr);
2308               status_clear(ptr);
2309           }else{
2310               status_disable(ptr);
2311           }
2312           break;
2313       case 2:
2314 #ifdef SHIFTJIS_CP932
2315         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2316             status_push_ch(ptr, c);
2317             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2318                 set_code_score(ptr, SCORE_CP932);
2319                 status_clear(ptr);
2320                 break;
2321             }
2322         }
2323 #endif /* SHIFTJIS_CP932 */
2324         status_disable(ptr);
2325           break;
2326       case 3:
2327           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2328               status_push_ch(ptr, c);
2329               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2330             set_code_score(ptr, SCORE_CP932);
2331             status_clear(ptr);
2332           }else{
2333               status_disable(ptr);
2334           }
2335           break;
2336     }
2337 }
2338
2339 void e_status(struct input_code *ptr, nkf_char c)
2340 {
2341     switch (ptr->stat){
2342       case -1:
2343           status_check(ptr, c);
2344           break;
2345       case 0:
2346           if (c <= DEL){
2347               break;
2348 #ifdef NUMCHAR_OPTION
2349           }else if (is_unicode_capsule(c)){
2350               break;
2351 #endif
2352           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2353               ptr->stat = 1;
2354               status_push_ch(ptr, c);
2355 #ifdef X0212_ENABLE
2356           }else if (0x8f == c){
2357               ptr->stat = 2;
2358               status_push_ch(ptr, c);
2359 #endif /* X0212_ENABLE */
2360           }else{
2361               status_disable(ptr);
2362           }
2363           break;
2364       case 1:
2365           if (0xa1 <= c && c <= 0xfe){
2366               status_push_ch(ptr, c);
2367               code_score(ptr);
2368               status_clear(ptr);
2369           }else{
2370               status_disable(ptr);
2371           }
2372           break;
2373 #ifdef X0212_ENABLE
2374       case 2:
2375           if (0xa1 <= c && c <= 0xfe){
2376               ptr->stat = 1;
2377               status_push_ch(ptr, c);
2378           }else{
2379               status_disable(ptr);
2380           }
2381 #endif /* X0212_ENABLE */
2382     }
2383 }
2384
2385 #ifdef UTF8_INPUT_ENABLE
2386 void w_status(struct input_code *ptr, nkf_char c)
2387 {
2388     switch (ptr->stat){
2389       case -1:
2390           status_check(ptr, c);
2391           break;
2392       case 0:
2393           if (c <= DEL){
2394               break;
2395 #ifdef NUMCHAR_OPTION
2396           }else if (is_unicode_capsule(c)){
2397               break;
2398 #endif
2399           }else if (0xc0 <= c && c <= 0xdf){
2400               ptr->stat = 1;
2401               status_push_ch(ptr, c);
2402           }else if (0xe0 <= c && c <= 0xef){
2403               ptr->stat = 2;
2404               status_push_ch(ptr, c);
2405           }else if (0xf0 <= c && c <= 0xf4){
2406               ptr->stat = 3;
2407               status_push_ch(ptr, c);
2408           }else{
2409               status_disable(ptr);
2410           }
2411           break;
2412       case 1:
2413       case 2:
2414           if (0x80 <= c && c <= 0xbf){
2415               status_push_ch(ptr, c);
2416               if (ptr->index > ptr->stat){
2417                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2418                              && ptr->buf[2] == 0xbf);
2419                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2420                            &ptr->buf[0], &ptr->buf[1]);
2421                   if (!bom){
2422                       code_score(ptr);
2423                   }
2424                   status_clear(ptr);
2425               }
2426           }else{
2427               status_disable(ptr);
2428           }
2429           break;
2430       case 3:
2431         if (0x80 <= c && c <= 0xbf){
2432             if (ptr->index < ptr->stat){
2433                 status_push_ch(ptr, c);
2434             } else {
2435                 status_clear(ptr);
2436             }
2437           }else{
2438               status_disable(ptr);
2439           }
2440           break;
2441     }
2442 }
2443 #endif
2444
2445 void code_status(nkf_char c)
2446 {
2447     int action_flag = 1;
2448     struct input_code *result = 0;
2449     struct input_code *p = input_code_list;
2450     while (p->name){
2451         if (!p->status_func) {
2452             ++p;
2453             continue;
2454         }
2455         if (!p->status_func)
2456             continue;
2457         (p->status_func)(p, c);
2458         if (p->stat > 0){
2459             action_flag = 0;
2460         }else if(p->stat == 0){
2461             if (result){
2462                 action_flag = 0;
2463             }else{
2464                 result = p;
2465             }
2466         }
2467         ++p;
2468     }
2469
2470     if (action_flag){
2471         if (result && !estab_f){
2472             set_iconv(TRUE, result->iconv_func);
2473         }else if (c <= DEL){
2474             struct input_code *ptr = input_code_list;
2475             while (ptr->name){
2476                 status_reset(ptr);
2477                 ++ptr;
2478             }
2479         }
2480     }
2481 }
2482
2483 #ifndef WIN32DLL
2484 nkf_char std_getc(FILE *f)
2485 {
2486     if (std_gc_ndx){
2487         return std_gc_buf[--std_gc_ndx];
2488     }
2489     return getc(f);
2490 }
2491 #endif /*WIN32DLL*/
2492
2493 nkf_char std_ungetc(nkf_char c, FILE *f)
2494 {
2495     if (std_gc_ndx == STD_GC_BUFSIZE){
2496         return EOF;
2497     }
2498     std_gc_buf[std_gc_ndx++] = c;
2499     return c;
2500 }
2501
2502 #ifndef WIN32DLL
2503 void std_putc(nkf_char c)
2504 {
2505     if(c!=EOF)
2506       putchar(c);
2507 }
2508 #endif /*WIN32DLL*/
2509
2510 #if !defined(PERL_XS) && !defined(WIN32DLL)
2511 nkf_char noconvert(FILE *f)
2512 {
2513     nkf_char    c;
2514
2515     if (nop_f == 2)
2516         module_connection();
2517     while ((c = (*i_getc)(f)) != EOF)
2518       (*o_putc)(c);
2519     (*o_putc)(EOF);
2520     return 1;
2521 }
2522 #endif
2523
2524 void module_connection(void)
2525 {
2526     if (!output_encoding) output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
2527     oconv = nkf_enc_to_oconv(output_encoding);
2528     o_putc = std_putc;
2529
2530     /* replace continucation module, from output side */
2531
2532     /* output redicrection */
2533 #ifdef CHECK_OPTION
2534     if (noout_f || guess_f){
2535         o_putc = no_putc;
2536     }
2537 #endif
2538     if (mimeout_f) {
2539         o_mputc = o_putc;
2540         o_putc = mime_putc;
2541         if (mimeout_f == TRUE) {
2542             o_base64conv = oconv; oconv = base64_conv;
2543         }
2544         /* base64_count = 0; */
2545     }
2546
2547     if (nlmode_f || guess_f) {
2548         o_nlconv = oconv; oconv = nl_conv;
2549     }
2550     if (rot_f) {
2551         o_rot_conv = oconv; oconv = rot_conv;
2552     }
2553     if (iso2022jp_f) {
2554         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2555     }
2556     if (hira_f) {
2557         o_hira_conv = oconv; oconv = hira_conv;
2558     }
2559     if (fold_f) {
2560         o_fconv = oconv; oconv = fold_conv;
2561         f_line = 0;
2562     }
2563     if (alpha_f || x0201_f) {
2564         o_zconv = oconv; oconv = z_conv;
2565     }
2566
2567     i_getc = std_getc;
2568     i_ungetc = std_ungetc;
2569     /* input redicrection */
2570 #ifdef INPUT_OPTION
2571     if (cap_f){
2572         i_cgetc = i_getc; i_getc = cap_getc;
2573         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2574     }
2575     if (url_f){
2576         i_ugetc = i_getc; i_getc = url_getc;
2577         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2578     }
2579 #endif
2580 #ifdef NUMCHAR_OPTION
2581     if (numchar_f){
2582         i_ngetc = i_getc; i_getc = numchar_getc;
2583         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2584     }
2585 #endif
2586 #ifdef UNICODE_NORMALIZATION
2587     if (nfc_f){
2588         i_nfc_getc = i_getc; i_getc = nfc_getc;
2589         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2590     }
2591 #endif
2592     if (mime_f && mimebuf_f==FIXED_MIME) {
2593         i_mgetc = i_getc; i_getc = mime_getc;
2594         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2595     }
2596     if (broken_f & 1) {
2597         i_bgetc = i_getc; i_getc = broken_getc;
2598         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2599     }
2600     if (input_encoding) {
2601         set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
2602     } else {
2603         set_iconv(FALSE, e_iconv);
2604     }
2605
2606     {
2607         struct input_code *p = input_code_list;
2608         while (p->name){
2609             status_reinit(p++);
2610         }
2611     }
2612 }
2613
2614 /*
2615  * Check and Ignore BOM
2616  */
2617 void check_bom(FILE *f)
2618 {
2619     int c2;
2620     switch(c2 = (*i_getc)(f)){
2621     case 0x00:
2622         if((c2 = (*i_getc)(f)) == 0x00){
2623             if((c2 = (*i_getc)(f)) == 0xFE){
2624                 if((c2 = (*i_getc)(f)) == 0xFF){
2625                     if(!input_encoding){
2626                         set_iconv(TRUE, w_iconv32);
2627                     }
2628                     if (iconv == w_iconv32) {
2629                         input_endian = ENDIAN_BIG;
2630                         return;
2631                     }
2632                     (*i_ungetc)(0xFF,f);
2633                 }else (*i_ungetc)(c2,f);
2634                 (*i_ungetc)(0xFE,f);
2635             }else if(c2 == 0xFF){
2636                 if((c2 = (*i_getc)(f)) == 0xFE){
2637                     if(!input_encoding){
2638                         set_iconv(TRUE, w_iconv32);
2639                     }
2640                     if (iconv == w_iconv32) {
2641                         input_endian = ENDIAN_2143;
2642                         return;
2643                     }
2644                     (*i_ungetc)(0xFF,f);
2645                 }else (*i_ungetc)(c2,f);
2646                 (*i_ungetc)(0xFF,f);
2647             }else (*i_ungetc)(c2,f);
2648             (*i_ungetc)(0x00,f);
2649         }else (*i_ungetc)(c2,f);
2650         (*i_ungetc)(0x00,f);
2651         break;
2652     case 0xEF:
2653         if((c2 = (*i_getc)(f)) == 0xBB){
2654             if((c2 = (*i_getc)(f)) == 0xBF){
2655                 if(!input_encoding){
2656                     set_iconv(TRUE, w_iconv);
2657                 }
2658                 if (iconv == w_iconv) {
2659                     return;
2660                 }
2661                 (*i_ungetc)(0xBF,f);
2662             }else (*i_ungetc)(c2,f);
2663             (*i_ungetc)(0xBB,f);
2664         }else (*i_ungetc)(c2,f);
2665         (*i_ungetc)(0xEF,f);
2666         break;
2667     case 0xFE:
2668         if((c2 = (*i_getc)(f)) == 0xFF){
2669             if((c2 = (*i_getc)(f)) == 0x00){
2670                 if((c2 = (*i_getc)(f)) == 0x00){
2671                     if(!input_encoding){
2672                         set_iconv(TRUE, w_iconv32);
2673                     }
2674                     if (iconv == w_iconv32) {
2675                         input_endian = ENDIAN_3412;
2676                         return;
2677                     }
2678                     (*i_ungetc)(0x00,f);
2679                 }else (*i_ungetc)(c2,f);
2680                 (*i_ungetc)(0x00,f);
2681             }else (*i_ungetc)(c2,f);
2682             if(!input_encoding){
2683                 set_iconv(TRUE, w_iconv16);
2684             }
2685             if (iconv == w_iconv16) {
2686                 input_endian = ENDIAN_BIG;
2687                 return;
2688             }
2689             (*i_ungetc)(0xFF,f);
2690         }else (*i_ungetc)(c2,f);
2691         (*i_ungetc)(0xFE,f);
2692         break;
2693     case 0xFF:
2694         if((c2 = (*i_getc)(f)) == 0xFE){
2695             if((c2 = (*i_getc)(f)) == 0x00){
2696                 if((c2 = (*i_getc)(f)) == 0x00){
2697                     if(!input_encoding){
2698                         set_iconv(TRUE, w_iconv32);
2699                     }
2700                     if (iconv == w_iconv32) {
2701                         input_endian = ENDIAN_LITTLE;
2702                         return;
2703                     }
2704                     (*i_ungetc)(0x00,f);
2705                 }else (*i_ungetc)(c2,f);
2706                 (*i_ungetc)(0x00,f);
2707             }else (*i_ungetc)(c2,f);
2708             if(!input_encoding){
2709                 set_iconv(TRUE, w_iconv16);
2710             }
2711             if (iconv == w_iconv16) {
2712                 input_endian = ENDIAN_LITTLE;
2713                 return;
2714             }
2715             (*i_ungetc)(0xFE,f);
2716         }else (*i_ungetc)(c2,f);
2717         (*i_ungetc)(0xFF,f);
2718         break;
2719     default:
2720         (*i_ungetc)(c2,f);
2721         break;
2722     }
2723 }
2724
2725 /*
2726    Conversion main loop. Code detection only.
2727  */
2728
2729 nkf_char kanji_convert(FILE *f)
2730 {
2731     nkf_char    c3, c2=0, c1, c0=0;
2732     int is_8bit = FALSE;
2733
2734     if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
2735         is_8bit = TRUE;
2736     }
2737
2738     input_mode = ASCII;
2739     output_mode = ASCII;
2740     shift_mode = FALSE;
2741
2742 #define NEXT continue      /* no output, get next */
2743 #define SEND ;             /* output c1 and c2, get next */
2744 #define LAST break         /* end of loop, go closing  */
2745
2746     module_connection();
2747     check_bom(f);
2748
2749     while ((c1 = (*i_getc)(f)) != EOF) {
2750 #ifdef INPUT_CODE_FIX
2751         if (!input_encoding)
2752 #endif
2753             code_status(c1);
2754         if (c2) {
2755             /* second byte */
2756             if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
2757                 /* in case of 8th bit is on */
2758                 if (!estab_f&&!mime_decode_mode) {
2759                     /* in case of not established yet */
2760                     /* It is still ambiguious */
2761                     if (h_conv(f, c2, c1)==EOF)
2762                         LAST;
2763                     else
2764                         c2 = 0;
2765                     NEXT;
2766                 } else {
2767                     /* in case of already established */
2768                     if (c1 < AT) {
2769                         /* ignore bogus code and not CP5022x UCD */
2770                         c2 = 0;
2771                         NEXT;
2772                     } else {
2773                         SEND;
2774                     }
2775                 }
2776             } else
2777                 /* second byte, 7 bit code */
2778                 /* it might be kanji shitfted */
2779                 if ((c1 == DEL) || (c1 <= SP)) {
2780                     /* ignore bogus first code */
2781                     c2 = 0;
2782                     NEXT;
2783                 } else
2784                     SEND;
2785         } else {
2786             /* first byte */
2787 #ifdef UTF8_INPUT_ENABLE
2788             if (iconv == w_iconv16) {
2789                 if (input_endian == ENDIAN_BIG) {
2790                     c2 = c1;
2791                     if ((c1 = (*i_getc)(f)) != EOF) {
2792                         if (0xD8 <= c2 && c2 <= 0xDB) {
2793                             if ((c0 = (*i_getc)(f)) != EOF) {
2794                                 c0 <<= 8;
2795                                 if ((c3 = (*i_getc)(f)) != EOF) {
2796                                     c0 |= c3;
2797                                 } else c2 = EOF;
2798                             } else c2 = EOF;
2799                         }
2800                     } else c2 = EOF;
2801                 } else {
2802                     if ((c2 = (*i_getc)(f)) != EOF) {
2803                         if (0xD8 <= c2 && c2 <= 0xDB) {
2804                             if ((c3 = (*i_getc)(f)) != EOF) {
2805                                 if ((c0 = (*i_getc)(f)) != EOF) {
2806                                     c0 <<= 8;
2807                                     c0 |= c3;
2808                                 } else c2 = EOF;
2809                             } else c2 = EOF;
2810                         }
2811                     } else c2 = EOF;
2812                 }
2813                 SEND;
2814             } else if(iconv == w_iconv32){
2815                 int c3 = c1;
2816                 if((c2 = (*i_getc)(f)) != EOF &&
2817                    (c1 = (*i_getc)(f)) != EOF &&
2818                    (c0 = (*i_getc)(f)) != EOF){
2819                     switch(input_endian){
2820                     case ENDIAN_BIG:
2821                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2822                         break;
2823                     case ENDIAN_LITTLE:
2824                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2825                         break;
2826                     case ENDIAN_2143:
2827                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2828                         break;
2829                     case ENDIAN_3412:
2830                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2831                         break;
2832                     }
2833                     c2 = 0;
2834                 }else{
2835                     c2 = EOF;
2836                 }
2837                 SEND;
2838             } else
2839 #endif
2840 #ifdef NUMCHAR_OPTION
2841             if (is_unicode_capsule(c1)){
2842                 SEND;
2843             } else
2844 #endif
2845             if (c1 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
2846                 /* 8 bit code */
2847                 if (!estab_f && !iso8859_f) {
2848                     /* not established yet */
2849                     c2 = c1;
2850                     NEXT;
2851                 } else { /* estab_f==TRUE */
2852                     if (iso8859_f) {
2853                         c2 = ISO_8859_1;
2854                         c1 &= 0x7f;
2855                         SEND;
2856                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2857                         /* SJIS X0201 Case... */
2858                         if (iso2022jp_f && !x0201_f) {
2859                             (*oconv)(GETA1, GETA2);
2860                             NEXT;
2861                         } else {
2862                             c2 = JIS_X_0201;
2863                             c1 &= 0x7f;
2864                             SEND;
2865                         }
2866                     } else if (c1==SSO && iconv != s_iconv) {
2867                         /* EUC X0201 Case */
2868                         c1 = (*i_getc)(f);  /* skip SSO */
2869                         code_status(c1);
2870                         if (SSP<=c1 && c1<0xe0) {
2871                             if (iso2022jp_f && !x0201_f) {
2872                                 (*oconv)(GETA1, GETA2);
2873                                 NEXT;
2874                             } else {
2875                                 c2 = JIS_X_0201;
2876                                 c1 &= 0x7f;
2877                                 SEND;
2878                             }
2879                         } else  { /* bogus code, skip SSO and one byte */
2880                             NEXT;
2881                         }
2882                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2883                                (c1 == 0xFD || c1 == 0xFE)) {
2884                         /* CP10001 */
2885                         c2 = JIS_X_0201;
2886                         c1 &= 0x7f;
2887                         SEND;
2888                     } else {
2889                        /* already established */
2890                        c2 = c1;
2891                        NEXT;
2892                     }
2893                 }
2894             } else if ((c1 > SP) && (c1 != DEL)) {
2895                 /* in case of Roman characters */
2896                 if (shift_mode) {
2897                     /* output 1 shifted byte */
2898                     if (iso8859_f) {
2899                         c2 = ISO_8859_1;
2900                         SEND;
2901                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2902                       /* output 1 shifted byte */
2903                         if (iso2022jp_f && !x0201_f) {
2904                             (*oconv)(GETA1, GETA2);
2905                             NEXT;
2906                         } else {
2907                             c2 = JIS_X_0201;
2908                             SEND;
2909                         }
2910                     } else {
2911                         /* look like bogus code */
2912                         NEXT;
2913                     }
2914                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
2915                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
2916                     /* in case of Kanji shifted */
2917                     c2 = c1;
2918                     NEXT;
2919                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2920                     /* Check MIME code */
2921                     if ((c1 = (*i_getc)(f)) == EOF) {
2922                         (*oconv)(0, '=');
2923                         LAST;
2924                     } else if (c1 == '?') {
2925                         /* =? is mime conversion start sequence */
2926                         if(mime_f == STRICT_MIME) {
2927                             /* check in real detail */
2928                             if (mime_begin_strict(f) == EOF)
2929                                 LAST;
2930                             else
2931                                 NEXT;
2932                         } else if (mime_begin(f) == EOF)
2933                             LAST;
2934                         else
2935                             NEXT;
2936                     } else {
2937                         (*oconv)(0, '=');
2938                         (*i_ungetc)(c1,f);
2939                         NEXT;
2940                     }
2941                 } else {
2942                     /* normal ASCII code */
2943                     SEND;
2944                 }
2945             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2946                 shift_mode = FALSE;
2947                 NEXT;
2948             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2949                 shift_mode = TRUE;
2950                 NEXT;
2951             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2952                 if ((c1 = (*i_getc)(f)) == EOF) {
2953                     /*  (*oconv)(0, ESC); don't send bogus code */
2954                     LAST;
2955                 } else if (c1 == '$') {
2956                     if ((c1 = (*i_getc)(f)) == EOF) {
2957                         /*
2958                         (*oconv)(0, ESC); don't send bogus code
2959                         (*oconv)(0, '$'); */
2960                         LAST;
2961                     } else if (c1 == '@'|| c1 == 'B') {
2962                         /* This is kanji introduction */
2963                         input_mode = JIS_X_0208;
2964                         shift_mode = FALSE;
2965                         set_input_codename("ISO-2022-JP");
2966 #ifdef CHECK_OPTION
2967                         debug("ISO-2022-JP");
2968 #endif
2969                         NEXT;
2970                     } else if (c1 == '(') {
2971                         if ((c1 = (*i_getc)(f)) == EOF) {
2972                             /* don't send bogus code
2973                             (*oconv)(0, ESC);
2974                             (*oconv)(0, '$');
2975                             (*oconv)(0, '(');
2976                                 */
2977                             LAST;
2978                         } else if (c1 == '@'|| c1 == 'B') {
2979                             /* This is kanji introduction */
2980                             input_mode = JIS_X_0208;
2981                             shift_mode = FALSE;
2982                             NEXT;
2983 #ifdef X0212_ENABLE
2984                         } else if (c1 == 'D'){
2985                             input_mode = JIS_X_0212;
2986                             shift_mode = FALSE;
2987                             NEXT;
2988 #endif /* X0212_ENABLE */
2989                         } else if (c1 == 0x4F){
2990                             input_mode = JIS_X_0213_1;
2991                             shift_mode = FALSE;
2992                             NEXT;
2993                         } else if (c1 == 0x50){
2994                             input_mode = JIS_X_0213_2;
2995                             shift_mode = FALSE;
2996                             NEXT;
2997                         } else {
2998                             /* could be some special code */
2999                             (*oconv)(0, ESC);
3000                             (*oconv)(0, '$');
3001                             (*oconv)(0, '(');
3002                             (*oconv)(0, c1);
3003                             NEXT;
3004                         }
3005                     } else if (broken_f&0x2) {
3006                         /* accept any ESC-(-x as broken code ... */
3007                         input_mode = JIS_X_0208;
3008                         shift_mode = FALSE;
3009                         NEXT;
3010                     } else {
3011                         (*oconv)(0, ESC);
3012                         (*oconv)(0, '$');
3013                         (*oconv)(0, c1);
3014                         NEXT;
3015                     }
3016                 } else if (c1 == '(') {
3017                     if ((c1 = (*i_getc)(f)) == EOF) {
3018                         /* don't send bogus code
3019                         (*oconv)(0, ESC);
3020                         (*oconv)(0, '('); */
3021                         LAST;
3022                     } else {
3023                         if (c1 == 'I') {
3024                             /* This is X0201 kana introduction */
3025                             input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
3026                             NEXT;
3027                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
3028                             /* This is X0208 kanji introduction */
3029                             input_mode = ASCII; shift_mode = FALSE;
3030                             NEXT;
3031                         } else if (broken_f&0x2) {
3032                             input_mode = ASCII; shift_mode = FALSE;
3033                             NEXT;
3034                         } else {
3035                             (*oconv)(0, ESC);
3036                             (*oconv)(0, '(');
3037                             /* maintain various input_mode here */
3038                             SEND;
3039                         }
3040                     }
3041                } else if ( c1 == 'N' || c1 == 'n'){
3042                    /* SS2 */
3043                    c3 = (*i_getc)(f);  /* skip SS2 */
3044                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
3045                        c1 = c3;
3046                        c2 = JIS_X_0201;
3047                        SEND;
3048                    }else{
3049                        (*i_ungetc)(c3, f);
3050                        /* lonely ESC  */
3051                        (*oconv)(0, ESC);
3052                        SEND;
3053                    }
3054                 } else {
3055                     /* lonely ESC  */
3056                     (*oconv)(0, ESC);
3057                     SEND;
3058                 }
3059             } else if (c1 == ESC && iconv == s_iconv) {
3060                 /* ESC in Shift_JIS */
3061                 if ((c1 = (*i_getc)(f)) == EOF) {
3062                     /*  (*oconv)(0, ESC); don't send bogus code */
3063                     LAST;
3064                 } else if (c1 == '$') {
3065                     /* J-PHONE emoji */
3066                     if ((c1 = (*i_getc)(f)) == EOF) {
3067                         /*
3068                            (*oconv)(0, ESC); don't send bogus code
3069                            (*oconv)(0, '$'); */
3070                         LAST;
3071                     } else {
3072                         if (('E' <= c1 && c1 <= 'G') ||
3073                             ('O' <= c1 && c1 <= 'Q')) {
3074                             /*
3075                                NUM : 0 1 2 3 4 5
3076                                BYTE: G E F O P Q
3077                                C%7 : 1 6 0 2 3 4
3078                                C%7 : 0 1 2 3 4 5 6
3079                                NUM : 2 0 3 4 5 X 1
3080                              */
3081                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
3082                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
3083                             while ((c1 = (*i_getc)(f)) != EOF) {
3084                                 if (SP <= c1 && c1 <= 'z') {
3085                                     (*oconv)(0, c1 + c0);
3086                                 } else break; /* c1 == SO */
3087                             }
3088                         }
3089                     }
3090                     if (c1 == EOF) LAST;
3091                     NEXT;
3092                 } else {
3093                     /* lonely ESC  */
3094                     (*oconv)(0, ESC);
3095                     SEND;
3096                 }
3097             } else if (c1 == LF || c1 == CR) {
3098                 if (broken_f&4) {
3099                     input_mode = ASCII; set_iconv(FALSE, 0);
3100                     SEND;
3101                 } else if (mime_decode_f && !mime_decode_mode){
3102                     if (c1 == LF) {
3103                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
3104                             i_ungetc(SP,f);
3105                             continue;
3106                         } else {
3107                             i_ungetc(c1,f);
3108                         }
3109                         c1 = LF;
3110                         SEND;
3111                     } else  { /* if (c1 == CR)*/
3112                         if ((c1=(*i_getc)(f))!=EOF) {
3113                             if (c1==SP) {
3114                                 i_ungetc(SP,f);
3115                                 continue;
3116                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
3117                                 i_ungetc(SP,f);
3118                                 continue;
3119                             } else {
3120                                 i_ungetc(c1,f);
3121                             }
3122                             i_ungetc(LF,f);
3123                         } else {
3124                             i_ungetc(c1,f);
3125                         }
3126                         c1 = CR;
3127                         SEND;
3128                     }
3129                 }
3130             } else if (c1 == DEL && input_mode == JIS_X_0208) {
3131                 /* CP5022x */
3132                 c2 = c1;
3133                 NEXT;
3134             } else
3135                 SEND;
3136         }
3137         /* send: */
3138         switch(input_mode){
3139         case ASCII:
3140             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
3141             case -2:
3142                 /* 4 bytes UTF-8 */
3143                 if ((c0 = (*i_getc)(f)) != EOF) {
3144                     code_status(c0);
3145                     c0 <<= 8;
3146                     if ((c3 = (*i_getc)(f)) != EOF) {
3147                         code_status(c3);
3148                         (*iconv)(c2, c1, c0|c3);
3149                     }
3150                 }
3151                 break;
3152             case -1:
3153                 /* 3 bytes EUC or UTF-8 */
3154                 if ((c0 = (*i_getc)(f)) != EOF) {
3155                     code_status(c0);
3156                     (*iconv)(c2, c1, c0);
3157                 }
3158                 break;
3159             }
3160             break;
3161         case JIS_X_0208:
3162         case JIS_X_0213_1:
3163             if (ms_ucs_map_f &&
3164                 0x7F <= c2 && c2 <= 0x92 &&
3165                 0x21 <= c1 && c1 <= 0x7E) {
3166                 /* CP932 UDC */
3167                 if(c1 == 0x7F) return 0;
3168                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
3169                 c2 = 0;
3170             }
3171             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
3172             break;
3173 #ifdef X0212_ENABLE
3174         case JIS_X_0212:
3175             (*oconv)(PREFIX_EUCG3 | c2, c1);
3176             break;
3177 #endif /* X0212_ENABLE */
3178         case JIS_X_0213_2:
3179             (*oconv)(PREFIX_EUCG3 | c2, c1);
3180             break;
3181         default:
3182             (*oconv)(input_mode, c1);  /* other special case */
3183         }
3184
3185         c2 = 0;
3186         c0 = 0;
3187         continue;
3188         /* goto next_word */
3189     }
3190
3191     /* epilogue */
3192     (*iconv)(EOF, 0, 0);
3193     if (!input_codename)
3194     {
3195         if (is_8bit) {
3196             struct input_code *p = input_code_list;
3197             struct input_code *result = p;
3198             while (p->name){
3199                 if (p->score < result->score) result = p;
3200                 ++p;
3201             }
3202             set_input_codename(result->name);
3203 #ifdef CHECK_OPTION
3204             debug(result->name);
3205 #endif
3206         }
3207     }
3208     return 1;
3209 }
3210
3211 nkf_char
3212 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3213 {
3214     nkf_char ret, c3, c0;
3215     int hold_index;
3216
3217
3218     /** it must NOT be in the kanji shifte sequence      */
3219     /** it must NOT be written in JIS7                   */
3220     /** and it must be after 2 byte 8bit code            */
3221
3222     hold_count = 0;
3223     push_hold_buf(c2);
3224     push_hold_buf(c1);
3225
3226     while ((c1 = (*i_getc)(f)) != EOF) {
3227         if (c1 == ESC){
3228             (*i_ungetc)(c1,f);
3229             break;
3230         }
3231         code_status(c1);
3232         if (push_hold_buf(c1) == EOF || estab_f){
3233             break;
3234         }
3235     }
3236
3237     if (!estab_f){
3238         struct input_code *p = input_code_list;
3239         struct input_code *result = p;
3240         if (c1 == EOF){
3241             code_status(c1);
3242         }
3243         while (p->name){
3244             if (p->status_func && p->score < result->score){
3245                 result = p;
3246             }
3247             ++p;
3248         }
3249         set_iconv(TRUE, result->iconv_func);
3250     }
3251
3252
3253     /** now,
3254      ** 1) EOF is detected, or
3255      ** 2) Code is established, or
3256      ** 3) Buffer is FULL (but last word is pushed)
3257      **
3258      ** in 1) and 3) cases, we continue to use
3259      ** Kanji codes by oconv and leave estab_f unchanged.
3260      **/
3261
3262     ret = c1;
3263     hold_index = 0;
3264     while (hold_index < hold_count){
3265         c2 = hold_buf[hold_index++];
3266         if (c2 <= DEL
3267 #ifdef NUMCHAR_OPTION
3268             || is_unicode_capsule(c2)
3269 #endif
3270             ){
3271             (*iconv)(0, c2, 0);
3272             continue;
3273         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3274             (*iconv)(JIS_X_0201, c2, 0);
3275             continue;
3276         }
3277         if (hold_index < hold_count){
3278             c1 = hold_buf[hold_index++];
3279         }else{
3280             c1 = (*i_getc)(f);
3281             if (c1 == EOF){
3282                 c3 = EOF;
3283                 break;
3284             }
3285             code_status(c1);
3286         }
3287         c0 = 0;
3288         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3289         case -2:
3290             /* 4 bytes UTF-8 */
3291             if (hold_index < hold_count){
3292                 c0 = hold_buf[hold_index++];
3293             } else if ((c0 = (*i_getc)(f)) == EOF) {
3294                 ret = EOF;
3295                 break;
3296             } else {
3297                 code_status(c0);
3298                 c0 <<= 8;
3299                 if (hold_index < hold_count){
3300                     c3 = hold_buf[hold_index++];
3301                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3302                     c0 = ret = EOF;
3303                     break;
3304                 } else {
3305                     code_status(c3);
3306                     (*iconv)(c2, c1, c0|c3);
3307                 }
3308             }
3309             break;
3310         case -1:
3311             /* 3 bytes EUC or UTF-8 */
3312             if (hold_index < hold_count){
3313                 c0 = hold_buf[hold_index++];
3314             } else if ((c0 = (*i_getc)(f)) == EOF) {
3315                 ret = EOF;
3316                 break;
3317             } else {
3318                 code_status(c0);
3319             }
3320             (*iconv)(c2, c1, c0);
3321             break;
3322         }
3323         if (c0 == EOF) break;
3324     }
3325     return ret;
3326 }
3327
3328 nkf_char push_hold_buf(nkf_char c2)
3329 {
3330     if (hold_count >= HOLD_SIZE*2)
3331         return (EOF);
3332     hold_buf[hold_count++] = (unsigned char)c2;
3333     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3334 }
3335
3336 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3337 {
3338 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3339     nkf_char val;
3340 #endif
3341     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3342 #ifdef SHIFTJIS_CP932
3343     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3344         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3345         if (val){
3346             c2 = val >> 8;
3347             c1 = val & 0xff;
3348         }
3349     }
3350     if (cp932inv_f
3351         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3352         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3353         if (c){
3354             c2 = c >> 8;
3355             c1 = c & 0xff;
3356         }
3357     }
3358 #endif /* SHIFTJIS_CP932 */
3359 #ifdef X0212_ENABLE
3360     if (!x0213_f && is_ibmext_in_sjis(c2)){
3361         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3362         if (val){
3363             if (val > 0x7FFF){
3364                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3365                 c1 = val & 0xff;
3366             }else{
3367                 c2 = val >> 8;
3368                 c1 = val & 0xff;
3369             }
3370             if (p2) *p2 = c2;
3371             if (p1) *p1 = c1;
3372             return 0;
3373         }
3374     }
3375 #endif
3376     if(c2 >= 0x80){
3377         if(x0213_f && c2 >= 0xF0){
3378             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3379                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3380             }else{ /* 78<=k<=94 */
3381                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3382                 if (0x9E < c1) c2++;
3383             }
3384         }else{
3385             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3386             if (0x9E < c1) c2++;
3387         }
3388         if (c1 < 0x9F)
3389             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3390         else {
3391             c1 = c1 - 0x7E;
3392         }
3393     }
3394
3395 #ifdef X0212_ENABLE
3396     c2 = x0212_unshift(c2);
3397 #endif
3398     if (p2) *p2 = c2;
3399     if (p1) *p1 = c1;
3400     return 0;
3401 }
3402
3403 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3404 {
3405     if (c2 == JIS_X_0201) {
3406         c1 &= 0x7f;
3407     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3408         /* NOP */
3409     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3410         /* CP932 UDC */
3411         if(c1 == 0x7F) return 0;
3412         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3413         c2 = 0;
3414     } else {
3415         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3416         if (ret) return ret;
3417     }
3418     (*oconv)(c2, c1);
3419     return 0;
3420 }
3421
3422 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3423 {
3424     if (c2 == JIS_X_0201) {
3425         c1 &= 0x7f;
3426 #ifdef X0212_ENABLE
3427     }else if (c2 == 0x8f){
3428         if (c0 == 0){
3429             return -1;
3430         }
3431         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3432             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3433             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3434             c2 = 0;
3435         } else {
3436             c2 = (c2 << 8) | (c1 & 0x7f);
3437             c1 = c0 & 0x7f;
3438 #ifdef SHIFTJIS_CP932
3439             if (cp51932_f){
3440                 nkf_char s2, s1;
3441                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3442                     s2e_conv(s2, s1, &c2, &c1);
3443                     if (c2 < 0x100){
3444                         c1 &= 0x7f;
3445                         c2 &= 0x7f;
3446                     }
3447                 }
3448             }
3449 #endif /* SHIFTJIS_CP932 */
3450         }
3451 #endif /* X0212_ENABLE */
3452     } else if (c2 == SSO){
3453         c2 = JIS_X_0201;
3454         c1 &= 0x7f;
3455     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3456         /* NOP */
3457     } else {
3458         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3459             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3460             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3461             c2 = 0;
3462         } else {
3463             c1 &= 0x7f;
3464             c2 &= 0x7f;
3465 #ifdef SHIFTJIS_CP932
3466             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3467                 nkf_char s2, s1;
3468                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3469                     s2e_conv(s2, s1, &c2, &c1);
3470                     if (c2 < 0x100){
3471                         c1 &= 0x7f;
3472                         c2 &= 0x7f;
3473                     }
3474                 }
3475             }
3476 #endif /* SHIFTJIS_CP932 */
3477         }
3478     }
3479     (*oconv)(c2, c1);
3480     return 0;
3481 }
3482
3483 #ifdef UTF8_INPUT_ENABLE
3484 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3485 {
3486     nkf_char ret = 0;
3487
3488     if (!c1){
3489         *p2 = 0;
3490         *p1 = c2;
3491     }else if (0xc0 <= c2 && c2 <= 0xef) {
3492         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3493 #ifdef NUMCHAR_OPTION
3494         if (ret > 0){
3495             if (p2) *p2 = 0;
3496             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3497             ret = 0;
3498         }
3499 #endif
3500     }
3501     return ret;
3502 }
3503
3504 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3505 {
3506     nkf_char ret = 0;
3507     static const char w_iconv_utf8_1st_byte[] =
3508     { /* 0xC0 - 0xFF */
3509         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3510         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3511         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3512         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3513
3514     if (c2 < 0 || 0xff < c2) {
3515     }else if (c2 == 0) { /* 0 : 1 byte*/
3516         c0 = 0;
3517     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3518         return 0;
3519     } else{
3520         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3521         case 21:
3522             if (c1 < 0x80 || 0xBF < c1) return 0;
3523             break;
3524         case 30:
3525             if (c0 == 0) return -1;
3526             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3527                 return 0;
3528             break;
3529         case 31:
3530         case 33:
3531             if (c0 == 0) return -1;
3532             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3533                 return 0;
3534             break;
3535         case 32:
3536             if (c0 == 0) return -1;
3537             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3538                 return 0;
3539             break;
3540         case 40:
3541             if (c0 == 0) return -2;
3542             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3543                 return 0;
3544             break;
3545         case 41:
3546             if (c0 == 0) return -2;
3547             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3548                 return 0;
3549             break;
3550         case 42:
3551             if (c0 == 0) return -2;
3552             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3553                 return 0;
3554             break;
3555         default:
3556             return 0;
3557             break;
3558         }
3559     }
3560     if (c2 == 0 || c2 == EOF){
3561     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3562         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3563         c2 = 0;
3564     } else {
3565         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3566     }
3567     if (ret == 0){
3568         (*oconv)(c2, c1);
3569     }
3570     return ret;
3571 }
3572 #endif
3573
3574 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3575 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3576 {
3577     val &= VALUE_MASK;
3578     if (val < 0x80){
3579         *p2 = val;
3580         *p1 = 0;
3581         *p0 = 0;
3582     }else if (val < 0x800){
3583         *p2 = 0xc0 | (val >> 6);
3584         *p1 = 0x80 | (val & 0x3f);
3585         *p0 = 0;
3586     } else if (val <= NKF_INT32_C(0xFFFF)) {
3587         *p2 = 0xe0 | (val >> 12);
3588         *p1 = 0x80 | ((val >> 6) & 0x3f);
3589         *p0 = 0x80 | (val        & 0x3f);
3590     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3591         *p2 = 0xe0 |  (val >> 16);
3592         *p1 = 0x80 | ((val >> 12) & 0x3f);
3593         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3594     } else {
3595         *p2 = 0;
3596         *p1 = 0;
3597         *p0 = 0;
3598     }
3599 }
3600 #endif
3601
3602 #ifdef UTF8_INPUT_ENABLE
3603 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3604 {
3605     nkf_char val;
3606     if (c2 >= 0xf8) {
3607         val = -1;
3608     } else if (c2 >= 0xf0){
3609         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3610         val = (c2 & 0x0f) << 18;
3611         val |= (c1 & 0x3f) << 12;
3612         val |= (c0 & 0x3f00) >> 2;
3613         val |= (c0 & 0x3f);
3614     }else if (c2 >= 0xe0){
3615         val = (c2 & 0x0f) << 12;
3616         val |= (c1 & 0x3f) << 6;
3617         val |= (c0 & 0x3f);
3618     }else if (c2 >= 0xc0){
3619         val = (c2 & 0x1f) << 6;
3620         val |= (c1 & 0x3f);
3621     }else{
3622         val = c2;
3623     }
3624     return val;
3625 }
3626
3627 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3628 {
3629     nkf_char c2, c1, c0;
3630     nkf_char ret = 0;
3631     val &= VALUE_MASK;
3632     if (val < 0x80){
3633         *p2 = 0;
3634         *p1 = val;
3635     }else{
3636         w16w_conv(val, &c2, &c1, &c0);
3637         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3638 #ifdef NUMCHAR_OPTION
3639         if (ret > 0){
3640             *p2 = 0;
3641             *p1 = CLASS_UNICODE | val;
3642             ret = 0;
3643         }
3644 #endif
3645     }
3646     return ret;
3647 }
3648 #endif
3649
3650 #ifdef UTF8_INPUT_ENABLE
3651 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3652 {
3653     nkf_char ret = 0;
3654     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3655         (*oconv)(c2, c1);
3656         return 0;
3657     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3658         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3659             return -2;
3660         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3661         c2 = 0;
3662     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3663         /*
3664            return 2;
3665         */
3666         return 1;
3667     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3668     if (ret) return ret;
3669     (*oconv)(c2, c1);
3670     return 0;
3671 }
3672
3673 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3674 {
3675     int ret = 0;
3676
3677     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3678     } else if (is_unicode_bmp(c1)) {
3679         ret = w16e_conv(c1, &c2, &c1);
3680     } else {
3681         c2 = 0;
3682         c1 =  CLASS_UNICODE | c1;
3683     }
3684     if (ret) return ret;
3685     (*oconv)(c2, c1);
3686     return 0;
3687 }
3688
3689 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3690 {
3691     const unsigned short *const *pp;
3692     const unsigned short *const *const *ppp;
3693     static const char no_best_fit_chars_table_C2[] =
3694     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3695         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3696         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3697         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3698     static const char no_best_fit_chars_table_C2_ms[] =
3699     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3700         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3701         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3702         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3703     static const char no_best_fit_chars_table_932_C2[] =
3704     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3705         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3706         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3707         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3708     static const char no_best_fit_chars_table_932_C3[] =
3709     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3710         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3711         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3712         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3713     nkf_char ret = 0;
3714
3715     if(c2 < 0x80){
3716         *p2 = 0;
3717         *p1 = c2;
3718     }else if(c2 < 0xe0){
3719         if(no_best_fit_chars_f){
3720             if(ms_ucs_map_f == UCS_MAP_CP932){
3721                 switch(c2){
3722                 case 0xC2:
3723                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3724                     break;
3725                 case 0xC3:
3726                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3727                     break;
3728                 }
3729             }else if(!cp932inv_f){
3730                 switch(c2){
3731                 case 0xC2:
3732                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3733                     break;
3734                 case 0xC3:
3735                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3736                     break;
3737                 }
3738             }else if(ms_ucs_map_f == UCS_MAP_MS){
3739                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3740             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3741                 switch(c2){
3742                 case 0xC2:
3743                     switch(c1){
3744                     case 0xA2:
3745                     case 0xA3:
3746                     case 0xA5:
3747                     case 0xA6:
3748                     case 0xAC:
3749                     case 0xAF:
3750                     case 0xB8:
3751                         return 1;
3752                     }
3753                     break;
3754                 }
3755             }
3756         }
3757         pp =
3758             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3759             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3760             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3761             utf8_to_euc_2bytes;
3762         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3763     }else if(c0 < 0xF0){
3764         if(no_best_fit_chars_f){
3765             if(ms_ucs_map_f == UCS_MAP_CP932){
3766                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3767             }else if(ms_ucs_map_f == UCS_MAP_MS){
3768                 switch(c2){
3769                 case 0xE2:
3770                     switch(c1){
3771                     case 0x80:
3772                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3773                         break;
3774                     case 0x88:
3775                         if(c0 == 0x92) return 1;
3776                         break;
3777                     }
3778                     break;
3779                 case 0xE3:
3780                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3781                     break;
3782                 }
3783             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3784                 switch(c2){
3785                 case 0xE3:
3786                     switch(c1){
3787                     case 0x82:
3788                             if(c0 == 0x94) return 1;
3789                         break;
3790                     case 0x83:
3791                             if(c0 == 0xBB) return 1;
3792                         break;
3793                     }
3794                     break;
3795                 }
3796             }else{
3797                 switch(c2){
3798                 case 0xE2:
3799                     switch(c1){
3800                     case 0x80:
3801                         if(c0 == 0x95) return 1;
3802                         break;
3803                     case 0x88:
3804                         if(c0 == 0xA5) return 1;
3805                         break;
3806                     }
3807                     break;
3808                 case 0xEF:
3809                     switch(c1){
3810                     case 0xBC:
3811                         if(c0 == 0x8D) return 1;
3812                         break;
3813                     case 0xBD:
3814                         if(c0 == 0x9E && !cp932inv_f) return 1;
3815                         break;
3816                     case 0xBF:
3817                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3818                         break;
3819                     }
3820                     break;
3821                 }
3822             }
3823         }
3824         ppp =
3825             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3826             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3827             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3828             utf8_to_euc_3bytes;
3829         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3830     }else return -1;
3831 #ifdef SHIFTJIS_CP932
3832     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3833         nkf_char s2, s1;
3834         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3835             s2e_conv(s2, s1, p2, p1);
3836         }else{
3837             ret = 1;
3838         }
3839     }
3840 #endif
3841     return ret;
3842 }
3843
3844 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3845 {
3846     nkf_char c2;
3847     const unsigned short *p;
3848     unsigned short val;
3849
3850     if (pp == 0) return 1;
3851
3852     c1 -= 0x80;
3853     if (c1 < 0 || psize <= c1) return 1;
3854     p = pp[c1];
3855     if (p == 0)  return 1;
3856
3857     c0 -= 0x80;
3858     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3859     val = p[c0];
3860     if (val == 0) return 1;
3861     if (no_cp932ext_f && (
3862         (val>>8) == 0x2D || /* NEC special characters */
3863         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3864         )) return 1;
3865
3866     c2 = val >> 8;
3867    if (val > 0x7FFF){
3868         c2 &= 0x7f;
3869         c2 |= PREFIX_EUCG3;
3870     }
3871     if (c2 == SO) c2 = JIS_X_0201;
3872     c1 = val & 0x7f;
3873     if (p2) *p2 = c2;
3874     if (p1) *p1 = c1;
3875     return 0;
3876 }
3877
3878 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3879 {
3880     int shift = 20;
3881     c &= VALUE_MASK;
3882     while(shift >= 0){
3883         if(c >= 1<<shift){
3884             while(shift >= 0){
3885                 (*f)(0, bin2hex(c>>shift));
3886                 shift -= 4;
3887             }
3888         }else{
3889             shift -= 4;
3890         }
3891     }
3892     return;
3893 }
3894
3895 void encode_fallback_html(nkf_char c)
3896 {
3897     (*oconv)(0, '&');
3898     (*oconv)(0, '#');
3899     c &= VALUE_MASK;
3900     if(c >= NKF_INT32_C(1000000))
3901         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3902     if(c >= NKF_INT32_C(100000))
3903         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3904     if(c >= 10000)
3905         (*oconv)(0, 0x30+(c/10000  )%10);
3906     if(c >= 1000)
3907         (*oconv)(0, 0x30+(c/1000   )%10);
3908     if(c >= 100)
3909         (*oconv)(0, 0x30+(c/100    )%10);
3910     if(c >= 10)
3911         (*oconv)(0, 0x30+(c/10     )%10);
3912     if(c >= 0)
3913         (*oconv)(0, 0x30+ c         %10);
3914     (*oconv)(0, ';');
3915     return;
3916 }
3917
3918 void encode_fallback_xml(nkf_char c)
3919 {
3920     (*oconv)(0, '&');
3921     (*oconv)(0, '#');
3922     (*oconv)(0, 'x');
3923     nkf_each_char_to_hex(oconv, c);
3924     (*oconv)(0, ';');
3925     return;
3926 }
3927
3928 void encode_fallback_java(nkf_char c)
3929 {
3930     (*oconv)(0, '\\');
3931     c &= VALUE_MASK;
3932     if(!is_unicode_bmp(c)){
3933         (*oconv)(0, 'U');
3934         (*oconv)(0, '0');
3935         (*oconv)(0, '0');
3936         (*oconv)(0, bin2hex(c>>20));
3937         (*oconv)(0, bin2hex(c>>16));
3938     }else{
3939         (*oconv)(0, 'u');
3940     }
3941     (*oconv)(0, bin2hex(c>>12));
3942     (*oconv)(0, bin2hex(c>> 8));
3943     (*oconv)(0, bin2hex(c>> 4));
3944     (*oconv)(0, bin2hex(c    ));
3945     return;
3946 }
3947
3948 void encode_fallback_perl(nkf_char c)
3949 {
3950     (*oconv)(0, '\\');
3951     (*oconv)(0, 'x');
3952     (*oconv)(0, '{');
3953     nkf_each_char_to_hex(oconv, c);
3954     (*oconv)(0, '}');
3955     return;
3956 }
3957
3958 void encode_fallback_subchar(nkf_char c)
3959 {
3960     c = unicode_subchar;
3961     (*oconv)((c>>8)&0xFF, c&0xFF);
3962     return;
3963 }
3964 #endif
3965
3966 #ifdef UTF8_OUTPUT_ENABLE
3967 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3968 {
3969     const unsigned short *p;
3970
3971     if (c2 == JIS_X_0201) {
3972         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3973             switch (c1) {
3974             case 0x20:
3975                 return 0xA0;
3976             case 0x7D:
3977                 return 0xA9;
3978             }
3979         }
3980         p = euc_to_utf8_1byte;
3981 #ifdef X0212_ENABLE
3982     } else if (is_eucg3(c2)){
3983         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3984             return 0xA6;
3985         }
3986         c2 = (c2&0x7f) - 0x21;
3987         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3988             p = x0212_to_utf8_2bytes[c2];
3989         else
3990             return 0;
3991 #endif
3992     } else {
3993         c2 &= 0x7f;
3994         c2 = (c2&0x7f) - 0x21;
3995         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3996             p =
3997                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3998                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3999                 euc_to_utf8_2bytes_ms[c2];
4000         else
4001             return 0;
4002     }
4003     if (!p) return 0;
4004     c1 = (c1 & 0x7f) - 0x21;
4005     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
4006         return p[c1];
4007     return 0;
4008 }
4009
4010 void w_oconv(nkf_char c2, nkf_char c1)
4011 {
4012     nkf_char c0;
4013     nkf_char val;
4014
4015     if (output_bom_f) {
4016         output_bom_f = FALSE;
4017         (*o_putc)('\357');
4018         (*o_putc)('\273');
4019         (*o_putc)('\277');
4020     }
4021
4022     if (c2 == EOF) {
4023         (*o_putc)(EOF);
4024         return;
4025     }
4026
4027 #ifdef NUMCHAR_OPTION
4028     if (c2 == 0 && is_unicode_capsule(c1)){
4029         val = c1 & VALUE_MASK;
4030         if (val < 0x80){
4031             (*o_putc)(val);
4032         }else if (val < 0x800){
4033             (*o_putc)(0xC0 | (val >> 6));
4034             (*o_putc)(0x80 | (val & 0x3f));
4035         } else if (val <= NKF_INT32_C(0xFFFF)) {
4036             (*o_putc)(0xE0 | (val >> 12));
4037             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
4038             (*o_putc)(0x80 | (val        & 0x3f));
4039         } else if (val <= NKF_INT32_C(0x10FFFF)) {
4040             (*o_putc)(0xF0 | ( val>>18));
4041             (*o_putc)(0x80 | ((val>>12) & 0x3f));
4042             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
4043             (*o_putc)(0x80 | ( val      & 0x3f));
4044         }
4045         return;
4046     }
4047 #endif
4048
4049     if (c2 == 0) {
4050         output_mode = ASCII;
4051         (*o_putc)(c1);
4052     } else if (c2 == ISO_8859_1) {
4053         output_mode = UTF_8;
4054         (*o_putc)(c1 | 0x080);
4055     } else {
4056         output_mode = UTF_8;
4057         val = e2w_conv(c2, c1);
4058         if (val){
4059             w16w_conv(val, &c2, &c1, &c0);
4060             (*o_putc)(c2);
4061             if (c1){
4062                 (*o_putc)(c1);
4063                 if (c0) (*o_putc)(c0);
4064             }
4065         }
4066     }
4067 }
4068
4069 void w_oconv16(nkf_char c2, nkf_char c1)
4070 {
4071     if (output_bom_f) {
4072         output_bom_f = FALSE;
4073         if (output_endian == ENDIAN_LITTLE){
4074             (*o_putc)((unsigned char)'\377');
4075             (*o_putc)('\376');
4076         }else{
4077             (*o_putc)('\376');
4078             (*o_putc)((unsigned char)'\377');
4079         }
4080     }
4081
4082     if (c2 == EOF) {
4083         (*o_putc)(EOF);
4084         return;
4085     }
4086
4087     if (c2 == ISO_8859_1) {
4088         c2 = 0;
4089         c1 |= 0x80;
4090 #ifdef NUMCHAR_OPTION
4091     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4092         if (is_unicode_bmp(c1)) {
4093             c2 = (c1 >> 8) & 0xff;
4094             c1 &= 0xff;
4095         } else {
4096             c1 &= VALUE_MASK;
4097             if (c1 <= UNICODE_MAX) {
4098                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
4099                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
4100                 if (output_endian == ENDIAN_LITTLE){
4101                     (*o_putc)(c2 & 0xff);
4102                     (*o_putc)((c2 >> 8) & 0xff);
4103                     (*o_putc)(c1 & 0xff);
4104                     (*o_putc)((c1 >> 8) & 0xff);
4105                 }else{
4106                     (*o_putc)((c2 >> 8) & 0xff);
4107                     (*o_putc)(c2 & 0xff);
4108                     (*o_putc)((c1 >> 8) & 0xff);
4109                     (*o_putc)(c1 & 0xff);
4110                 }
4111             }
4112             return;
4113         }
4114 #endif
4115     } else if (c2) {
4116         nkf_char val = e2w_conv(c2, c1);
4117         c2 = (val >> 8) & 0xff;
4118         c1 = val & 0xff;
4119         if (!val) return;
4120     }
4121     if (output_endian == ENDIAN_LITTLE){
4122         (*o_putc)(c1);
4123         (*o_putc)(c2);
4124     }else{
4125         (*o_putc)(c2);
4126         (*o_putc)(c1);
4127     }
4128 }
4129
4130 void w_oconv32(nkf_char c2, nkf_char c1)
4131 {
4132     if (output_bom_f) {
4133         output_bom_f = FALSE;
4134         if (output_endian == ENDIAN_LITTLE){
4135             (*o_putc)((unsigned char)'\377');
4136             (*o_putc)('\376');
4137             (*o_putc)('\000');
4138             (*o_putc)('\000');
4139         }else{
4140             (*o_putc)('\000');
4141             (*o_putc)('\000');
4142             (*o_putc)('\376');
4143             (*o_putc)((unsigned char)'\377');
4144         }
4145     }
4146
4147     if (c2 == EOF) {
4148         (*o_putc)(EOF);
4149         return;
4150     }
4151
4152     if (c2 == ISO_8859_1) {
4153         c1 |= 0x80;
4154 #ifdef NUMCHAR_OPTION
4155     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4156         c1 &= VALUE_MASK;
4157 #endif
4158     } else if (c2) {
4159         c1 = e2w_conv(c2, c1);
4160         if (!c1) return;
4161     }
4162     if (output_endian == ENDIAN_LITTLE){
4163         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4164         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4165         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4166         (*o_putc)('\000');
4167     }else{
4168         (*o_putc)('\000');
4169         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4170         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4171         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4172     }
4173 }
4174 #endif
4175
4176 void e_oconv(nkf_char c2, nkf_char c1)
4177 {
4178 #ifdef NUMCHAR_OPTION
4179     if (c2 == 0 && is_unicode_capsule(c1)){
4180         w16e_conv(c1, &c2, &c1);
4181         if (c2 == 0 && is_unicode_capsule(c1)){
4182             c2 = c1 & VALUE_MASK;
4183             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
4184                 /* eucJP-ms UDC */
4185                 c1 &= 0xFFF;
4186                 c2 = c1 / 94;
4187                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
4188                 c1 = 0x21 + c1 % 94;
4189                 if (is_eucg3(c2)){
4190                     (*o_putc)(0x8f);
4191                     (*o_putc)((c2 & 0x7f) | 0x080);
4192                     (*o_putc)(c1 | 0x080);
4193                 }else{
4194                     (*o_putc)((c2 & 0x7f) | 0x080);
4195                     (*o_putc)(c1 | 0x080);
4196                 }
4197                 return;
4198             } else {
4199                 if (encode_fallback) (*encode_fallback)(c1);
4200                 return;
4201             }
4202         }
4203     }
4204 #endif
4205     if (c2 == EOF) {
4206         (*o_putc)(EOF);
4207         return;
4208     } else if (c2 == 0) {
4209         output_mode = ASCII;
4210         (*o_putc)(c1);
4211     } else if (c2 == JIS_X_0201) {
4212         output_mode = EUC_JP;
4213         (*o_putc)(SSO); (*o_putc)(c1|0x80);
4214     } else if (c2 == ISO_8859_1) {
4215         output_mode = ISO_8859_1;
4216         (*o_putc)(c1 | 0x080);
4217 #ifdef X0212_ENABLE
4218     } else if (is_eucg3(c2)){
4219         output_mode = EUC_JP;
4220 #ifdef SHIFTJIS_CP932
4221         if (!cp932inv_f){
4222             nkf_char s2, s1;
4223             if (e2s_conv(c2, c1, &s2, &s1) == 0){
4224                 s2e_conv(s2, s1, &c2, &c1);
4225             }
4226         }
4227 #endif
4228         if (c2 == 0) {
4229             output_mode = ASCII;