OSDN Git Service

* refactoring.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.164 2008/01/21 23:05:37 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2008-01-21"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #if defined(DEFAULT_CODE_JIS)
44 #elif defined(DEFAULT_CODE_SJIS)
45 #elif defined(DEFAULT_CODE_EUC)
46 #elif defined(DEFAULT_CODE_UTF8)
47 #else
48 #define DEFAULT_CODE_JIS 1
49 #endif
50
51 #ifndef MIME_DECODE_DEFAULT
52 #define MIME_DECODE_DEFAULT STRICT_MIME
53 #endif
54 #ifndef X0201_DEFAULT
55 #define X0201_DEFAULT TRUE
56 #endif
57
58 #if DEFAULT_NEWLINE == 0x0D0A
59 #define PUT_NEWLINE(func) do {\
60     func(0x0D);\
61     func(0x0A);\
62 } while (0)
63 #define OCONV_NEWLINE(func) do {\
64     func(0, 0x0D);\
65     func(0, 0x0A);\
66 } while (0)
67 #elif DEFAULT_NEWLINE == 0x0D
68 #define PUT_NEWLINE(func) func(0x0D)
69 #define OCONV_NEWLINE(func) func(0, 0x0D)
70 #else
71 #define DEFAULT_NEWLINE 0x0A
72 #define PUT_NEWLINE(func) func(0x0A)
73 #define OCONV_NEWLINE(func) func(0, 0x0A)
74 #endif
75 #ifdef HELP_OUTPUT_STDERR
76 #define HELP_OUTPUT stderr
77 #else
78 #define HELP_OUTPUT stdout
79 #endif
80
81 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
82 #define MSDOS
83 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
84 #define __WIN32__
85 #endif
86 #endif
87
88 #ifdef PERL_XS
89 #undef OVERWRITE
90 #endif
91
92 #ifndef PERL_XS
93 #include <stdio.h>
94 #endif
95
96 #include <stdlib.h>
97 #include <string.h>
98
99 #if defined(MSDOS) || defined(__OS2__)
100 #include <fcntl.h>
101 #include <io.h>
102 #if defined(_MSC_VER) || defined(__WATCOMC__)
103 #define mktemp _mktemp
104 #endif
105 #endif
106
107 #ifdef MSDOS
108 #ifdef LSI_C
109 #define setbinmode(fp) fsetbin(fp)
110 #elif defined(__DJGPP__)
111 #include <libc/dosio.h>
112 #define setbinmode(fp) djgpp_setbinmode(fp)
113 #else /* Microsoft C, Turbo C */
114 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
115 #endif
116 #else /* UNIX */
117 #define setbinmode(fp)
118 #endif
119
120 #if defined(__DJGPP__)
121 void  djgpp_setbinmode(FILE *fp)
122 {
123     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
124     int fd, m;
125     fd = fileno(fp);
126     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
127     __file_handle_set(fd, m);
128 }
129 #endif
130
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
133 #else /* BSD */
134 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
135 #endif
136
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
139 #define         EASYWIN
140 #ifndef __WIN16__
141 #define __WIN16__
142 #endif
143 #include <windows.h>
144 #endif
145
146 #ifdef OVERWRITE
147 /* added by satoru@isoternet.org */
148 #if defined(__EMX__)
149 #include <sys/types.h>
150 #endif
151 #include <sys/stat.h>
152 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
153 #include <unistd.h>
154 #if defined(__WATCOMC__)
155 #include <sys/utime.h>
156 #else
157 #include <utime.h>
158 #endif
159 #else /* defined(MSDOS) */
160 #ifdef __WIN32__
161 #ifdef __BORLANDC__ /* BCC32 */
162 #include <utime.h>
163 #else /* !defined(__BORLANDC__) */
164 #include <sys/utime.h>
165 #endif /* (__BORLANDC__) */
166 #else /* !defined(__WIN32__) */
167 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
168 #include <sys/utime.h>
169 #elif defined(__TURBOC__) /* BCC */
170 #include <utime.h>
171 #elif defined(LSI_C) /* LSI C */
172 #endif /* (__WIN32__) */
173 #endif
174 #endif
175 #endif
176
177 #define         FALSE   0
178 #define         TRUE    1
179
180 /* state of output_mode and input_mode
181
182    c2           0 means ASCII
183                 JIS_X_0201
184                 ISO_8859_1
185                 JIS_X_0208
186                 EOF      all termination
187    c1           32bit data
188
189  */
190
191 /* MIME ENCODE */
192
193 #define         FIXED_MIME      7
194 #define         STRICT_MIME     8
195
196 /* byte order */
197 enum byte_order {
198     ENDIAN_BIG    = 1,
199     ENDIAN_LITTLE = 2,
200     ENDIAN_2143   = 3,
201     ENDIAN_3412   = 4
202 };
203
204 /* ASCII CODE */
205
206 #define         BS      0x08
207 #define         TAB     0x09
208 #define         LF      0x0a
209 #define         CR      0x0d
210 #define         ESC     0x1b
211 #define         SP      0x20
212 #define         AT      0x40
213 #define         SSP     0xa0
214 #define         DEL     0x7f
215 #define         SI      0x0f
216 #define         SO      0x0e
217 #define         SSO     0x8e
218 #define         SS3     0x8f
219 #define         CRLF    0x0D0A
220
221
222 /* encodings */
223
224 enum nkf_encodings {
225     ASCII,
226     ISO_8859_1,
227     ISO_2022_JP,
228     CP50220,
229     CP50221,
230     CP50222,
231     ISO_2022_JP_1,
232     ISO_2022_JP_3,
233     SHIFT_JIS,
234     WINDOWS_31J,
235     CP10001,
236     EUC_JP,
237     CP51932,
238     EUCJP_MS,
239     EUCJP_ASCII,
240     SHIFT_JISX0213,
241     SHIFT_JIS_2004,
242     EUC_JISX0213,
243     EUC_JIS_2004,
244     UTF_8,
245     UTF_8N,
246     UTF_8_BOM,
247     UTF8_MAC,
248     UTF_16,
249     UTF_16BE,
250     UTF_16BE_BOM,
251     UTF_16LE,
252     UTF_16LE_BOM,
253     UTF_32,
254     UTF_32BE,
255     UTF_32BE_BOM,
256     UTF_32LE,
257     UTF_32LE_BOM,
258     NKF_ENCODING_TABLE_SIZE,
259     JIS_X_0201=0x1000,
260     JIS_X_0208=0x1001,
261     JIS_X_0212=0x1002,
262     JIS_X_0213_1=0x1003,
263     JIS_X_0213_2=0x1004,
264     BINARY
265 };
266
267 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
268 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
269 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
270 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
271 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
272 void j_oconv(nkf_char c2, nkf_char c1);
273 void s_oconv(nkf_char c2, nkf_char c1);
274 void e_oconv(nkf_char c2, nkf_char c1);
275 void w_oconv(nkf_char c2, nkf_char c1);
276 void w_oconv16(nkf_char c2, nkf_char c1);
277 void w_oconv32(nkf_char c2, nkf_char c1);
278
279 typedef struct {
280     const char *name;
281     nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
282     void (*oconv)(nkf_char c2, nkf_char c1);
283 } nkf_native_encoding;
284
285 nkf_native_encoding NkfEncodingASCII =          { "ASCII", e_iconv, e_oconv };
286 nkf_native_encoding NkfEncodingISO_2022_JP =    { "ISO-2022-JP", e_iconv, j_oconv };
287 nkf_native_encoding NkfEncodingShift_JIS =      { "Shift_JIS", s_iconv, s_oconv };
288 nkf_native_encoding NkfEncodingEUC_JP =         { "EUC-JP", e_iconv, e_oconv };
289 nkf_native_encoding NkfEncodingUTF_8 =          { "UTF-8", w_iconv, w_oconv };
290 nkf_native_encoding NkfEncodingUTF_16 =         { "UTF-16", w_iconv16, w_oconv16 };
291 nkf_native_encoding NkfEncodingUTF_32 =         { "UTF-32", w_iconv32, w_oconv32 };
292
293 typedef struct {
294     const int id;
295     const char *name;
296     const nkf_native_encoding *base_encoding;
297 } nkf_encoding;
298
299 nkf_encoding nkf_encoding_table[] = {
300     {ASCII,             "US-ASCII",             &NkfEncodingASCII},
301     {ISO_8859_1,        "ISO-8859-1",           &NkfEncodingASCII},
302     {ISO_2022_JP,       "ISO-2022-JP",          &NkfEncodingISO_2022_JP},
303     {CP50220,           "CP50220",              &NkfEncodingISO_2022_JP},
304     {CP50221,           "CP50221",              &NkfEncodingISO_2022_JP},
305     {CP50222,           "CP50222",              &NkfEncodingISO_2022_JP},
306     {ISO_2022_JP_1,     "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
307     {ISO_2022_JP_3,     "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
308     {SHIFT_JIS,         "Shift_JIS",            &NkfEncodingShift_JIS},
309     {WINDOWS_31J,       "Windows-31J",          &NkfEncodingShift_JIS},
310     {CP10001,           "CP10001",              &NkfEncodingShift_JIS},
311     {EUC_JP,            "EUC-JP",               &NkfEncodingEUC_JP},
312     {CP51932,           "CP51932",              &NkfEncodingEUC_JP},
313     {EUCJP_MS,          "eucJP-MS",             &NkfEncodingEUC_JP},
314     {EUCJP_ASCII,       "eucJP-ASCII",          &NkfEncodingEUC_JP},
315     {SHIFT_JISX0213,    "Shift_JISX0213",       &NkfEncodingShift_JIS},
316     {SHIFT_JIS_2004,    "Shift_JIS-2004",       &NkfEncodingShift_JIS},
317     {EUC_JISX0213,      "EUC-JISX0213",         &NkfEncodingEUC_JP},
318     {EUC_JIS_2004,      "EUC-JIS-2004",         &NkfEncodingEUC_JP},
319     {UTF_8,             "UTF-8",                &NkfEncodingUTF_8},
320     {UTF_8N,            "UTF-8N",               &NkfEncodingUTF_8},
321     {UTF_8_BOM,         "UTF-8-BOM",            &NkfEncodingUTF_8},
322     {UTF8_MAC,          "UTF8-MAC",             &NkfEncodingUTF_8},
323     {UTF_16,            "UTF-16",               &NkfEncodingUTF_16},
324     {UTF_16BE,          "UTF-16BE",             &NkfEncodingUTF_16},
325     {UTF_16BE_BOM,      "UTF-16BE-BOM",         &NkfEncodingUTF_16},
326     {UTF_16LE,          "UTF-16LE",             &NkfEncodingUTF_16},
327     {UTF_16LE_BOM,      "UTF-16LE-BOM",         &NkfEncodingUTF_16},
328     {UTF_32,            "UTF-32",               &NkfEncodingUTF_32},
329     {UTF_32BE,          "UTF-32BE",             &NkfEncodingUTF_32},
330     {UTF_32BE_BOM,      "UTF-32BE-BOM",         &NkfEncodingUTF_32},
331     {UTF_32LE,          "UTF-32LE",             &NkfEncodingUTF_32},
332     {UTF_32LE_BOM,      "UTF-32LE-BOM",         &NkfEncodingUTF_32},
333     {BINARY,            "BINARY",               &NkfEncodingASCII},
334     {-1,                NULL,                   NULL}
335 };
336
337 struct {
338     const char *name;
339     const int id;
340 } encoding_name_to_id_table[] = {
341     {"US-ASCII",                ASCII},
342     {"ASCII",                   ASCII},
343     {"ISO-2022-JP",             ISO_2022_JP},
344     {"ISO2022JP-CP932",         CP50220},
345     {"CP50220",                 CP50220},
346     {"CP50221",                 CP50221},
347     {"CP50222",                 CP50222},
348     {"ISO-2022-JP-1",           ISO_2022_JP_1},
349     {"ISO-2022-JP-3",           ISO_2022_JP_3},
350     {"SHIFT_JIS",               SHIFT_JIS},
351     {"SJIS",                    SHIFT_JIS},
352     {"WINDOWS-31J",             WINDOWS_31J},
353     {"CSWINDOWS31J",            WINDOWS_31J},
354     {"CP932",                   WINDOWS_31J},
355     {"MS932",                   WINDOWS_31J},
356     {"CP10001",                 CP10001},
357     {"EUCJP",                   EUC_JP},
358     {"EUC-JP",                  EUC_JP},
359     {"CP51932",                 CP51932},
360     {"EUC-JP-MS",               EUCJP_MS},
361     {"EUCJP-MS",                EUCJP_MS},
362     {"EUCJPMS",                 EUCJP_MS},
363     {"EUC-JP-ASCII",            EUCJP_ASCII},
364     {"EUCJP-ASCII",             EUCJP_ASCII},
365     {"SHIFT_JISX0213",          SHIFT_JISX0213},
366     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
367     {"EUC-JISX0213",            EUC_JISX0213},
368     {"EUC-JIS-2004",            EUC_JIS_2004},
369     {"UTF-8",                   UTF_8},
370     {"UTF-8N",                  UTF_8N},
371     {"UTF-8-BOM",               UTF_8_BOM},
372     {"UTF8-MAC",                UTF8_MAC},
373     {"UTF-8-MAC",               UTF8_MAC},
374     {"UTF-16",                  UTF_16},
375     {"UTF-16BE",                UTF_16BE},
376     {"UTF-16BE-BOM",            UTF_16BE_BOM},
377     {"UTF-16LE",                UTF_16LE},
378     {"UTF-16LE-BOM",            UTF_16LE_BOM},
379     {"UTF-32",                  UTF_32},
380     {"UTF-32BE",                UTF_32BE},
381     {"UTF-32BE-BOM",            UTF_32BE_BOM},
382     {"UTF-32LE",                UTF_32LE},
383     {"UTF-32LE-BOM",            UTF_32LE_BOM},
384     {"BINARY",                  BINARY},
385     {NULL,                      -1}
386 };
387
388 #if defined(DEFAULT_CODE_JIS)
389 #define     DEFAULT_ENCODING ISO_2022_JP
390 #elif defined(DEFAULT_CODE_SJIS)
391 #define     DEFAULT_ENCODING SHIFT_JIS
392 #elif defined(DEFAULT_CODE_EUC)
393 #define     DEFAULT_ENCODING EUC_JP
394 #elif defined(DEFAULT_CODE_UTF8)
395 #define     DEFAULT_ENCODING UTF_8
396 #endif
397
398
399 #define         is_alnum(c)  \
400             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
401
402 /* I don't trust portablity of toupper */
403 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
404 #define nkf_isoctal(c)  ('0'<=c && c<='7')
405 #define nkf_isdigit(c)  ('0'<=c && c<='9')
406 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
407 #define nkf_isblank(c) (c == SP || c == TAB)
408 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
409 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
410 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
411 #define nkf_isprint(c) (SP<=c && c<='~')
412 #define nkf_isgraph(c) ('!'<=c && c<='~')
413 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
414                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
415                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
416 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
417 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
418 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
419     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
420      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
421
422 #define CP932_TABLE_BEGIN 0xFA
423 #define CP932_TABLE_END   0xFC
424 #define CP932INV_TABLE_BEGIN 0xED
425 #define CP932INV_TABLE_END   0xEE
426 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
427
428 #define         HOLD_SIZE       1024
429 #if defined(INT_IS_SHORT)
430 #define         IOBUF_SIZE      2048
431 #else
432 #define         IOBUF_SIZE      16384
433 #endif
434
435 #define         DEFAULT_J       'B'
436 #define         DEFAULT_R       'B'
437
438 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
439 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
440
441 #define         RANGE_NUM_MAX   18
442 #define         GETA1   0x22
443 #define         GETA2   0x2e
444
445
446 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
447 #define sizeof_euc_to_utf8_1byte 94
448 #define sizeof_euc_to_utf8_2bytes 94
449 #define sizeof_utf8_to_euc_C2 64
450 #define sizeof_utf8_to_euc_E5B8 64
451 #define sizeof_utf8_to_euc_2bytes 112
452 #define sizeof_utf8_to_euc_3bytes 16
453 #endif
454
455 /* MIME preprocessor */
456
457 #ifdef EASYWIN /*Easy Win */
458 extern POINT _BufferSize;
459 #endif
460
461 struct input_code{
462     char *name;
463     nkf_char stat;
464     nkf_char score;
465     nkf_char index;
466     nkf_char buf[3];
467     void (*status_func)(struct input_code *, nkf_char);
468     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
469     int _file_stat;
470 };
471
472 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
473 static nkf_encoding *input_encoding = NULL;
474 static nkf_encoding *output_encoding = NULL;
475 static void set_output_encoding(nkf_encoding *enc);
476
477 #if !defined(PERL_XS) && !defined(WIN32DLL)
478 static  nkf_char     noconvert(FILE *f);
479 #endif
480 static  void    module_connection(void);
481 static  nkf_char     kanji_convert(FILE *f);
482 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
483 static  nkf_char     push_hold_buf(nkf_char c2);
484 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
485 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
486 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
487 /* UCS Mapping
488  * 0: Shift_JIS, eucJP-ascii
489  * 1: eucJP-ms
490  * 2: CP932, CP51932
491  * 3: CP10001
492  */
493 #define UCS_MAP_ASCII   0
494 #define UCS_MAP_MS      1
495 #define UCS_MAP_CP932   2
496 #define UCS_MAP_CP10001 3
497 static int ms_ucs_map_f = UCS_MAP_ASCII;
498 #endif
499 #ifdef UTF8_INPUT_ENABLE
500 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
501 static  int     no_cp932ext_f = FALSE;
502 /* ignore ZERO WIDTH NO-BREAK SPACE */
503 static  int     no_best_fit_chars_f = FALSE;
504 static  int     input_endian = ENDIAN_BIG;
505 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
506 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
507 static  void    encode_fallback_html(nkf_char c);
508 static  void    encode_fallback_xml(nkf_char c);
509 static  void    encode_fallback_java(nkf_char c);
510 static  void    encode_fallback_perl(nkf_char c);
511 static  void    encode_fallback_subchar(nkf_char c);
512 static  void    (*encode_fallback)(nkf_char c) = NULL;
513 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
514 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
515 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
516 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
517 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
518 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
519 static  void    w_status(struct input_code *, nkf_char);
520 #endif
521 #ifdef UTF8_OUTPUT_ENABLE
522 static  int     output_bom_f = FALSE;
523 static  int     output_endian = ENDIAN_BIG;
524 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
525 #endif
526 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
527 static  void    fold_conv(nkf_char c2,nkf_char c1);
528 static  void    nl_conv(nkf_char c2,nkf_char c1);
529 static  void    z_conv(nkf_char c2,nkf_char c1);
530 static  void    rot_conv(nkf_char c2,nkf_char c1);
531 static  void    hira_conv(nkf_char c2,nkf_char c1);
532 static  void    base64_conv(nkf_char c2,nkf_char c1);
533 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
534 static  void    no_connection(nkf_char c2,nkf_char c1);
535 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
536
537 static  void    code_score(struct input_code *ptr);
538 static  void    code_status(nkf_char c);
539
540 static  void    std_putc(nkf_char c);
541 static  nkf_char     std_getc(FILE *f);
542 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
543
544 static  nkf_char     broken_getc(FILE *f);
545 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
546
547 static  nkf_char     mime_begin(FILE *f);
548 static  nkf_char     mime_getc(FILE *f);
549 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
550
551 static  void    switch_mime_getc(void);
552 static  void    unswitch_mime_getc(void);
553 static  nkf_char     mime_begin_strict(FILE *f);
554 static  nkf_char     mime_getc_buf(FILE *f);
555 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
556 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
557
558 static  nkf_char     base64decode(nkf_char c);
559 static  void    mime_prechar(nkf_char c2, nkf_char c1);
560 static  void    mime_putc(nkf_char c);
561 static  void    open_mime(nkf_char c);
562 static  void    close_mime(void);
563 static  void    eof_mime(void);
564 static  void    mimeout_addchar(nkf_char c);
565 #ifndef PERL_XS
566 static  void    usage(void);
567 static  void    version(void);
568 static  void    show_configuration(void);
569 #endif
570 static  void    options(unsigned char *c);
571 static  void    reinit(void);
572
573 /* buffers */
574
575 #if !defined(PERL_XS) && !defined(WIN32DLL)
576 static unsigned char   stdibuf[IOBUF_SIZE];
577 static unsigned char   stdobuf[IOBUF_SIZE];
578 #endif
579 static unsigned char   hold_buf[HOLD_SIZE*2];
580 static int             hold_count = 0;
581
582 /* MIME preprocessor fifo */
583
584 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
585 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
586 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
587 static unsigned char           mime_buf[MIME_BUF_SIZE];
588 static unsigned int            mime_top = 0;
589 static unsigned int            mime_last = 0;  /* decoded */
590 static unsigned int            mime_input = 0; /* undecoded */
591 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
592
593 /* flags */
594 static int             unbuf_f = FALSE;
595 static int             estab_f = FALSE;
596 static int             nop_f = FALSE;
597 static int             binmode_f = TRUE;       /* binary mode */
598 static int             rot_f = FALSE;          /* rot14/43 mode */
599 static int             hira_f = FALSE;          /* hira/kata henkan */
600 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
601 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
602 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
603 static int             mimebuf_f = FALSE;      /* MIME buffered input */
604 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
605 static int             iso8859_f = FALSE;      /* ISO8859 through */
606 static int             mimeout_f = FALSE;       /* base64 mode */
607 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
608 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
609
610 #ifdef UNICODE_NORMALIZATION
611 static int nfc_f = FALSE;
612 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
613 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
614 static nkf_char nfc_getc(FILE *f);
615 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
616 #endif
617
618 #ifdef INPUT_OPTION
619 static int cap_f = FALSE;
620 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
621 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
622 static nkf_char cap_getc(FILE *f);
623 static nkf_char cap_ungetc(nkf_char c,FILE *f);
624
625 static int url_f = FALSE;
626 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
627 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
628 static nkf_char url_getc(FILE *f);
629 static nkf_char url_ungetc(nkf_char c,FILE *f);
630 #endif
631
632 #if defined(INT_IS_SHORT)
633 #define NKF_INT32_C(n)   (n##L)
634 #else
635 #define NKF_INT32_C(n)   (n)
636 #endif
637 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
638 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
639 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
640 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
641 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
642 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
643 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
644
645 #ifdef NUMCHAR_OPTION
646 static int numchar_f = FALSE;
647 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
648 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
649 static nkf_char numchar_getc(FILE *f);
650 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
651 #endif
652
653 #ifdef CHECK_OPTION
654 static int noout_f = FALSE;
655 static void no_putc(nkf_char c);
656 static int debug_f = FALSE;
657 static void debug(const char *str);
658 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
659 #endif
660
661 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
662 #if !defined PERL_XS
663 static  void    print_guessed_code(char *filename);
664 #endif
665 static  void    set_input_codename(char *codename);
666
667 #ifdef EXEC_IO
668 static int exec_f = 0;
669 #endif
670
671 #ifdef SHIFTJIS_CP932
672 /* invert IBM extended characters to others */
673 static int cp51932_f = FALSE;
674
675 /* invert NEC-selected IBM extended characters to IBM extended characters */
676 static int cp932inv_f = TRUE;
677
678 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
679 #endif /* SHIFTJIS_CP932 */
680
681 #ifdef X0212_ENABLE
682 static int x0212_f = FALSE;
683 static nkf_char x0212_shift(nkf_char c);
684 static nkf_char x0212_unshift(nkf_char c);
685 #endif
686 static int x0213_f = FALSE;
687
688 static unsigned char prefix_table[256];
689
690 static void set_code_score(struct input_code *ptr, nkf_char score);
691 static void clr_code_score(struct input_code *ptr, nkf_char score);
692 static void status_disable(struct input_code *ptr);
693 static void status_push_ch(struct input_code *ptr, nkf_char c);
694 static void status_clear(struct input_code *ptr);
695 static void status_reset(struct input_code *ptr);
696 static void status_reinit(struct input_code *ptr);
697 static void status_check(struct input_code *ptr, nkf_char c);
698 static void e_status(struct input_code *, nkf_char);
699 static void s_status(struct input_code *, nkf_char);
700
701 struct input_code input_code_list[] = {
702     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
703     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
704 #ifdef UTF8_INPUT_ENABLE
705     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
706     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
707     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
708 #endif
709     {0}
710 };
711
712 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
713 static int              base64_count = 0;
714
715 /* X0208 -> ASCII converter */
716
717 /* fold parameter */
718 static int             f_line = 0;    /* chars in line */
719 static int             f_prev = 0;
720 static int             fold_preserve_f = FALSE; /* preserve new lines */
721 static int             fold_f  = FALSE;
722 static int             fold_len  = 0;
723
724 /* options */
725 static unsigned char   kanji_intro = DEFAULT_J;
726 static unsigned char   ascii_intro = DEFAULT_R;
727
728 /* Folding */
729
730 #define FOLD_MARGIN  10
731 #define DEFAULT_FOLD 60
732
733 static int             fold_margin  = FOLD_MARGIN;
734
735 /* converters */
736
737 #ifdef DEFAULT_CODE_JIS
738 #   define  DEFAULT_CONV j_oconv
739 #endif
740 #ifdef DEFAULT_CODE_SJIS
741 #   define  DEFAULT_CONV s_oconv
742 #endif
743 #ifdef DEFAULT_CODE_EUC
744 #   define  DEFAULT_CONV e_oconv
745 #endif
746 #ifdef DEFAULT_CODE_UTF8
747 #   define  DEFAULT_CONV w_oconv
748 #endif
749
750 /* process default */
751 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
752 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
753
754 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
755 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
756 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
757 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
758 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
759 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
760 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
761
762 /* static redirections */
763
764 static  void   (*o_putc)(nkf_char c) = std_putc;
765
766 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
767 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
768
769 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
770 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
771
772 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
773
774 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
775 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
776
777 /* for strict mime */
778 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
779 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
780
781 /* Global states */
782 static int output_mode = ASCII,    /* output kanji mode */
783            input_mode =  ASCII,    /* input kanji mode */
784            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
785 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
786
787 /* X0201 / X0208 conversion tables */
788
789 /* X0201 kana conversion table */
790 /* 90-9F A0-DF */
791 static const unsigned char cv[]= {
792     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
793     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
794     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
795     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
796     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
797     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
798     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
799     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
800     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
801     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
802     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
803     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
804     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
805     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
806     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
807     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
808     0x00,0x00};
809
810
811 /* X0201 kana conversion table for daguten */
812 /* 90-9F A0-DF */
813 static const unsigned char dv[]= {
814     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
817     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
819     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
820     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
821     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
822     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
823     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
824     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
825     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
826     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
827     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
828     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830     0x00,0x00};
831
832 /* X0201 kana conversion table for han-daguten */
833 /* 90-9F A0-DF */
834 static const unsigned char ev[]= {
835     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
838     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
846     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
847     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851     0x00,0x00};
852
853
854 /* X0208 kigou conversion table */
855 /* 0x8140 - 0x819e */
856 static const unsigned char fv[] = {
857
858     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
859     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
860     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
861     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
862     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
863     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
864     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
865     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
866     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
867     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
868     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
869     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
870 } ;
871
872
873
874 static int             file_out_f = FALSE;
875 #ifdef OVERWRITE
876 static int             overwrite_f = FALSE;
877 static int             preserve_time_f = FALSE;
878 static int             backup_f = FALSE;
879 static char            *backup_suffix = "";
880 static char *get_backup_filename(const char *suffix, const char *filename);
881 #endif
882
883 static int nlmode_f = 0;   /* CR, LF, CRLF */
884 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
885 static nkf_char prev_cr = 0; /* CR or 0 */
886 #ifdef EASYWIN /*Easy Win */
887 static int             end_check;
888 #endif /*Easy Win */
889
890 #define STD_GC_BUFSIZE (256)
891 nkf_char std_gc_buf[STD_GC_BUFSIZE];
892 nkf_char std_gc_ndx;
893
894 char* nkf_strcpy(const char *str)
895 {
896     char* result = malloc(strlen(str) + 1);
897     if (!result){
898         perror(str);
899         return "";
900     }
901     strcpy(result, str);
902     return result;
903 }
904
905 static void nkf_str_upcase(const char *src, char *dest, size_t length)
906 {
907     int i = 0;
908     for (; i < length && src[i]; i++) {
909         dest[i] = nkf_toupper(src[i]);
910     }
911     dest[i] = 0;
912 }
913
914 static nkf_encoding *nkf_enc_from_index(int idx)
915 {
916     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
917         return 0;
918     }
919     return &nkf_encoding_table[idx];
920 }
921
922 static int nkf_enc_find_index(const char *name)
923 {
924     int i, index = -1;
925     if (*name == 'X' && *(name+1) == '-') name += 2;
926     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
927         if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
928             return encoding_name_to_id_table[i].id;
929         }
930     }
931     return index;
932 }
933
934 static nkf_encoding *nkf_enc_find(const char *name)
935 {
936     int idx = -1;
937     idx = nkf_enc_find_index(name);
938     if (idx < 0) return 0;
939     return nkf_enc_from_index(idx);
940 }
941
942 #define nkf_enc_name(enc) (enc)->name
943 #define nkf_enc_to_index(enc) (enc)->id
944 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
945 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
946 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
947 #define nkf_enc_asciicompat(enc) (\
948     nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
949     nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
950 #define nkf_enc_unicode_p(enc) (\
951     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
952     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
953     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
954 #define nkf_enc_cp5022x_p(enc) (\
955     nkf_enc_to_index(enc) == CP50220 ||\
956     nkf_enc_to_index(enc) == CP50221 ||\
957     nkf_enc_to_index(enc) == CP50222)
958
959 #ifdef WIN32DLL
960 #include "nkf32dll.c"
961 #elif defined(PERL_XS)
962 #else /* WIN32DLL */
963 int main(int argc, char **argv)
964 {
965     FILE  *fin;
966     unsigned char  *cp;
967
968     char *outfname = NULL;
969     char *origfname;
970
971 #ifdef EASYWIN /*Easy Win */
972     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
973 #endif
974
975     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
976         cp = (unsigned char *)*argv;
977         options(cp);
978 #ifdef EXEC_IO
979         if (exec_f){
980             int fds[2], pid;
981             if (pipe(fds) < 0 || (pid = fork()) < 0){
982                 abort();
983             }
984             if (pid == 0){
985                 if (exec_f > 0){
986                     close(fds[0]);
987                     dup2(fds[1], 1);
988                 }else{
989                     close(fds[1]);
990                     dup2(fds[0], 0);
991                 }
992                 execvp(argv[1], &argv[1]);
993             }
994             if (exec_f > 0){
995                 close(fds[1]);
996                 dup2(fds[0], 0);
997             }else{
998                 close(fds[0]);
999                 dup2(fds[1], 1);
1000             }
1001             argc = 0;
1002             break;
1003         }
1004 #endif
1005     }
1006
1007     if (guess_f) {
1008 #ifdef CHECK_OPTION
1009         int debug_f_back = debug_f;
1010 #endif
1011 #ifdef EXEC_IO
1012         int exec_f_back = exec_f;
1013 #endif
1014 #ifdef X0212_ENABLE
1015         int x0212_f_back = x0212_f;
1016 #endif
1017         int x0213_f_back = x0213_f;
1018         int guess_f_back = guess_f;
1019         reinit();
1020         guess_f = guess_f_back;
1021         mime_f = FALSE;
1022 #ifdef CHECK_OPTION
1023         debug_f = debug_f_back;
1024 #endif
1025 #ifdef EXEC_IO
1026         exec_f = exec_f_back;
1027 #endif
1028 #ifdef X0212_ENABLE
1029         x0212_f = x0212_f_back;
1030 #endif
1031         x0213_f = x0213_f_back;
1032     }
1033
1034     if (binmode_f == TRUE)
1035 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1036     if (freopen("","wb",stdout) == NULL)
1037         return (-1);
1038 #else
1039     setbinmode(stdout);
1040 #endif
1041
1042     if (unbuf_f)
1043       setbuf(stdout, (char *) NULL);
1044     else
1045       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
1046
1047     if (argc == 0) {
1048       if (binmode_f == TRUE)
1049 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1050       if (freopen("","rb",stdin) == NULL) return (-1);
1051 #else
1052       setbinmode(stdin);
1053 #endif
1054       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
1055       if (nop_f)
1056           noconvert(stdin);
1057       else {
1058           kanji_convert(stdin);
1059           if (guess_f) print_guessed_code(NULL);
1060       }
1061     } else {
1062       int nfiles = argc;
1063         int is_argument_error = FALSE;
1064       while (argc--) {
1065             input_codename = NULL;
1066             input_newline = 0;
1067 #ifdef CHECK_OPTION
1068             iconv_for_check = 0;
1069 #endif
1070           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
1071                 perror(*(argv-1));
1072                 is_argument_error = TRUE;
1073                 continue;
1074           } else {
1075 #ifdef OVERWRITE
1076               int fd = 0;
1077               int fd_backup = 0;
1078 #endif
1079
1080 /* reopen file for stdout */
1081               if (file_out_f == TRUE) {
1082 #ifdef OVERWRITE
1083                   if (overwrite_f){
1084                       outfname = malloc(strlen(origfname)
1085                                         + strlen(".nkftmpXXXXXX")
1086                                         + 1);
1087                       if (!outfname){
1088                           perror(origfname);
1089                           return -1;
1090                       }
1091                       strcpy(outfname, origfname);
1092 #ifdef MSDOS
1093                       {
1094                           int i;
1095                           for (i = strlen(outfname); i; --i){
1096                               if (outfname[i - 1] == '/'
1097                                   || outfname[i - 1] == '\\'){
1098                                   break;
1099                               }
1100                           }
1101                           outfname[i] = '\0';
1102                       }
1103                       strcat(outfname, "ntXXXXXX");
1104                       mktemp(outfname);
1105                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
1106                                 S_IREAD | S_IWRITE);
1107 #else
1108                       strcat(outfname, ".nkftmpXXXXXX");
1109                       fd = mkstemp(outfname);
1110 #endif
1111                       if (fd < 0
1112                           || (fd_backup = dup(fileno(stdout))) < 0
1113                           || dup2(fd, fileno(stdout)) < 0
1114                           ){
1115                           perror(origfname);
1116                           return -1;
1117                       }
1118                   }else
1119 #endif
1120                   if(argc == 1) {
1121                       outfname = *argv++;
1122                       argc--;
1123                   } else {
1124                       outfname = "nkf.out";
1125                   }
1126
1127                   if(freopen(outfname, "w", stdout) == NULL) {
1128                       perror (outfname);
1129                       return (-1);
1130                   }
1131                   if (binmode_f == TRUE) {
1132 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1133                       if (freopen("","wb",stdout) == NULL)
1134                            return (-1);
1135 #else
1136                       setbinmode(stdout);
1137 #endif
1138                   }
1139               }
1140               if (binmode_f == TRUE)
1141 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1142                  if (freopen("","rb",fin) == NULL)
1143                     return (-1);
1144 #else
1145                  setbinmode(fin);
1146 #endif
1147               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
1148               if (nop_f)
1149                   noconvert(fin);
1150               else {
1151                   char *filename = NULL;
1152                   kanji_convert(fin);
1153                   if (nfiles > 1) filename = origfname;
1154                   if (guess_f) print_guessed_code(filename);
1155               }
1156               fclose(fin);
1157 #ifdef OVERWRITE
1158               if (overwrite_f) {
1159                   struct stat     sb;
1160 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1161                   time_t tb[2];
1162 #else
1163                   struct utimbuf  tb;
1164 #endif
1165
1166                   fflush(stdout);
1167                   close(fd);
1168                   if (dup2(fd_backup, fileno(stdout)) < 0){
1169                       perror("dup2");
1170                   }
1171                   if (stat(origfname, &sb)) {
1172                       fprintf(stderr, "Can't stat %s\n", origfname);
1173                   }
1174                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
1175                   if (chmod(outfname, sb.st_mode)) {
1176                       fprintf(stderr, "Can't set permission %s\n", outfname);
1177                   }
1178
1179                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
1180                     if(preserve_time_f){
1181 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1182                         tb[0] = tb[1] = sb.st_mtime;
1183                         if (utime(outfname, tb)) {
1184                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1185                         }
1186 #else
1187                         tb.actime  = sb.st_atime;
1188                         tb.modtime = sb.st_mtime;
1189                         if (utime(outfname, &tb)) {
1190                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1191                         }
1192 #endif
1193                     }
1194                     if(backup_f){
1195                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
1196 #ifdef MSDOS
1197                         unlink(backup_filename);
1198 #endif
1199                         if (rename(origfname, backup_filename)) {
1200                             perror(backup_filename);
1201                             fprintf(stderr, "Can't rename %s to %s\n",
1202                                     origfname, backup_filename);
1203                         }
1204                     }else{
1205 #ifdef MSDOS
1206                         if (unlink(origfname)){
1207                             perror(origfname);
1208                         }
1209 #endif
1210                     }
1211                   if (rename(outfname, origfname)) {
1212                       perror(origfname);
1213                       fprintf(stderr, "Can't rename %s to %s\n",
1214                               outfname, origfname);
1215                   }
1216                   free(outfname);
1217               }
1218 #endif
1219           }
1220       }
1221         if (is_argument_error)
1222             return(-1);
1223     }
1224 #ifdef EASYWIN /*Easy Win */
1225     if (file_out_f == FALSE)
1226         scanf("%d",&end_check);
1227     else
1228         fclose(stdout);
1229 #else /* for Other OS */
1230     if (file_out_f == TRUE)
1231         fclose(stdout);
1232 #endif /*Easy Win */
1233     return (0);
1234 }
1235 #endif /* WIN32DLL */
1236
1237 #ifdef OVERWRITE
1238 char *get_backup_filename(const char *suffix, const char *filename)
1239 {
1240     char *backup_filename;
1241     int asterisk_count = 0;
1242     int i, j;
1243     int filename_length = strlen(filename);
1244
1245     for(i = 0; suffix[i]; i++){
1246         if(suffix[i] == '*') asterisk_count++;
1247     }
1248
1249     if(asterisk_count){
1250         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1251         if (!backup_filename){
1252             perror("Can't malloc backup filename.");
1253             return NULL;
1254         }
1255
1256         for(i = 0, j = 0; suffix[i];){
1257             if(suffix[i] == '*'){
1258                 backup_filename[j] = '\0';
1259                 strncat(backup_filename, filename, filename_length);
1260                 i++;
1261                 j += filename_length;
1262             }else{
1263                 backup_filename[j++] = suffix[i++];
1264             }
1265         }
1266         backup_filename[j] = '\0';
1267     }else{
1268         j = strlen(suffix) + filename_length;
1269         backup_filename = malloc( + 1);
1270         strcpy(backup_filename, filename);
1271         strcat(backup_filename, suffix);
1272         backup_filename[j] = '\0';
1273     }
1274     return backup_filename;
1275 }
1276 #endif
1277
1278 static const struct {
1279     const char *name;
1280     const char *alias;
1281 } long_option[] = {
1282     {"ic=", ""},
1283     {"oc=", ""},
1284     {"base64","jMB"},
1285     {"euc","e"},
1286     {"euc-input","E"},
1287     {"fj","jm"},
1288     {"help","v"},
1289     {"jis","j"},
1290     {"jis-input","J"},
1291     {"mac","sLm"},
1292     {"mime","jM"},
1293     {"mime-input","m"},
1294     {"msdos","sLw"},
1295     {"sjis","s"},
1296     {"sjis-input","S"},
1297     {"unix","eLu"},
1298     {"version","V"},
1299     {"windows","sLw"},
1300     {"hiragana","h1"},
1301     {"katakana","h2"},
1302     {"katakana-hiragana","h3"},
1303     {"guess=", ""},
1304     {"guess", "g2"},
1305     {"cp932", ""},
1306     {"no-cp932", ""},
1307 #ifdef X0212_ENABLE
1308     {"x0212", ""},
1309 #endif
1310 #ifdef UTF8_OUTPUT_ENABLE
1311     {"utf8", "w"},
1312     {"utf16", "w16"},
1313     {"ms-ucs-map", ""},
1314     {"fb-skip", ""},
1315     {"fb-html", ""},
1316     {"fb-xml", ""},
1317     {"fb-perl", ""},
1318     {"fb-java", ""},
1319     {"fb-subchar", ""},
1320     {"fb-subchar=", ""},
1321 #endif
1322 #ifdef UTF8_INPUT_ENABLE
1323     {"utf8-input", "W"},
1324     {"utf16-input", "W16"},
1325     {"no-cp932ext", ""},
1326     {"no-best-fit-chars",""},
1327 #endif
1328 #ifdef UNICODE_NORMALIZATION
1329     {"utf8mac-input", ""},
1330 #endif
1331 #ifdef OVERWRITE
1332     {"overwrite", ""},
1333     {"overwrite=", ""},
1334     {"in-place", ""},
1335     {"in-place=", ""},
1336 #endif
1337 #ifdef INPUT_OPTION
1338     {"cap-input", ""},
1339     {"url-input", ""},
1340 #endif
1341 #ifdef NUMCHAR_OPTION
1342     {"numchar-input", ""},
1343 #endif
1344 #ifdef CHECK_OPTION
1345     {"no-output", ""},
1346     {"debug", ""},
1347 #endif
1348 #ifdef SHIFTJIS_CP932
1349     {"cp932inv", ""},
1350 #endif
1351 #ifdef EXEC_IO
1352     {"exec-in", ""},
1353     {"exec-out", ""},
1354 #endif
1355     {"prefix=", ""},
1356 };
1357
1358 static void set_input_encoding(nkf_encoding *enc)
1359 {
1360     switch (nkf_enc_to_index(enc)) {
1361     case CP50220:
1362     case CP50221:
1363     case CP50222:
1364 #ifdef SHIFTJIS_CP932
1365         cp51932_f = TRUE;
1366 #endif
1367 #ifdef UTF8_OUTPUT_ENABLE
1368         ms_ucs_map_f = UCS_MAP_CP932;
1369 #endif
1370         break;
1371     case ISO_2022_JP_1:
1372 #ifdef X0212_ENABLE
1373         x0212_f = TRUE;
1374 #endif
1375         break;
1376     case ISO_2022_JP_3:
1377 #ifdef X0212_ENABLE
1378         x0212_f = TRUE;
1379 #endif
1380         x0213_f = TRUE;
1381         break;
1382     case WINDOWS_31J:
1383 #ifdef SHIFTJIS_CP932
1384         cp51932_f = TRUE;
1385 #endif
1386 #ifdef UTF8_OUTPUT_ENABLE
1387         ms_ucs_map_f = UCS_MAP_CP932;
1388 #endif
1389         break;
1390     case CP10001:
1391 #ifdef SHIFTJIS_CP932
1392         cp51932_f = TRUE;
1393 #endif
1394 #ifdef UTF8_OUTPUT_ENABLE
1395         ms_ucs_map_f = UCS_MAP_CP10001;
1396 #endif
1397         break;
1398     case CP51932:
1399 #ifdef SHIFTJIS_CP932
1400         cp51932_f = TRUE;
1401 #endif
1402 #ifdef UTF8_OUTPUT_ENABLE
1403         ms_ucs_map_f = UCS_MAP_CP932;
1404 #endif
1405         break;
1406     case EUCJP_MS:
1407 #ifdef SHIFTJIS_CP932
1408         cp51932_f = FALSE;
1409 #endif
1410 #ifdef UTF8_OUTPUT_ENABLE
1411         ms_ucs_map_f = UCS_MAP_MS;
1412 #endif
1413         break;
1414     case EUCJP_ASCII:
1415 #ifdef SHIFTJIS_CP932
1416         cp51932_f = FALSE;
1417 #endif
1418 #ifdef UTF8_OUTPUT_ENABLE
1419         ms_ucs_map_f = UCS_MAP_ASCII;
1420 #endif
1421         break;
1422     case SHIFT_JISX0213:
1423     case SHIFT_JIS_2004:
1424         x0213_f = TRUE;
1425 #ifdef SHIFTJIS_CP932
1426         cp51932_f = FALSE;
1427 #endif
1428         break;
1429     case EUC_JISX0213:
1430     case EUC_JIS_2004:
1431         x0213_f = TRUE;
1432 #ifdef SHIFTJIS_CP932
1433         cp51932_f = FALSE;
1434 #endif
1435         break;
1436 #ifdef UTF8_INPUT_ENABLE
1437 #ifdef UNICODE_NORMALIZATION
1438     case UTF8_MAC:
1439         nfc_f = TRUE;
1440         break;
1441 #endif
1442     case UTF_16:
1443     case UTF_16BE:
1444     case UTF_16BE_BOM:
1445         input_endian = ENDIAN_BIG;
1446         break;
1447     case UTF_16LE:
1448     case UTF_16LE_BOM:
1449         input_endian = ENDIAN_LITTLE;
1450         break;
1451     case UTF_32:
1452     case UTF_32BE:
1453     case UTF_32BE_BOM:
1454         input_endian = ENDIAN_BIG;
1455         break;
1456     case UTF_32LE:
1457     case UTF_32LE_BOM:
1458         input_endian = ENDIAN_LITTLE;
1459         break;
1460 #endif
1461     }
1462 }
1463
1464 static void set_output_encoding(nkf_encoding *enc)
1465 {
1466     x0201_f = FALSE;
1467     switch (nkf_enc_to_index(enc)) {
1468     case CP50220:
1469         x0201_f = TRUE;
1470 #ifdef SHIFTJIS_CP932
1471         cp932inv_f = FALSE;
1472 #endif
1473 #ifdef UTF8_OUTPUT_ENABLE
1474         ms_ucs_map_f = UCS_MAP_CP932;
1475 #endif
1476         break;
1477     case CP50221:
1478 #ifdef SHIFTJIS_CP932
1479         cp932inv_f = FALSE;
1480 #endif
1481 #ifdef UTF8_OUTPUT_ENABLE
1482         ms_ucs_map_f = UCS_MAP_CP932;
1483 #endif
1484         break;
1485     case ISO_2022_JP_1:
1486 #ifdef X0212_ENABLE
1487         x0212_f = TRUE;
1488 #endif
1489 #ifdef SHIFTJIS_CP932
1490         cp932inv_f = FALSE;
1491 #endif
1492         break;
1493     case ISO_2022_JP_3:
1494 #ifdef X0212_ENABLE
1495         x0212_f = TRUE;
1496 #endif
1497         x0213_f = TRUE;
1498 #ifdef SHIFTJIS_CP932
1499         cp932inv_f = FALSE;
1500 #endif
1501         break;
1502     case WINDOWS_31J:
1503 #ifdef UTF8_OUTPUT_ENABLE
1504         ms_ucs_map_f = UCS_MAP_CP932;
1505 #endif
1506         break;
1507     case CP10001:
1508 #ifdef UTF8_OUTPUT_ENABLE
1509         ms_ucs_map_f = UCS_MAP_CP10001;
1510 #endif
1511         break;
1512     case CP51932:
1513 #ifdef SHIFTJIS_CP932
1514         cp932inv_f = FALSE;
1515 #endif
1516 #ifdef UTF8_OUTPUT_ENABLE
1517         ms_ucs_map_f = UCS_MAP_CP932;
1518 #endif
1519         break;
1520     case EUCJP_MS:
1521 #ifdef X0212_ENABLE
1522         x0212_f = TRUE;
1523 #endif
1524 #ifdef UTF8_OUTPUT_ENABLE
1525         ms_ucs_map_f = UCS_MAP_MS;
1526 #endif
1527         break;
1528     case EUCJP_ASCII:
1529 #ifdef X0212_ENABLE
1530         x0212_f = TRUE;
1531 #endif
1532 #ifdef UTF8_OUTPUT_ENABLE
1533         ms_ucs_map_f = UCS_MAP_ASCII;
1534 #endif
1535         break;
1536     case SHIFT_JISX0213:
1537     case SHIFT_JIS_2004:
1538         x0213_f = TRUE;
1539 #ifdef SHIFTJIS_CP932
1540         cp932inv_f = FALSE;
1541 #endif
1542         break;
1543     case EUC_JISX0213:
1544     case EUC_JIS_2004:
1545 #ifdef X0212_ENABLE
1546         x0212_f = TRUE;
1547 #endif
1548         x0213_f = TRUE;
1549 #ifdef SHIFTJIS_CP932
1550         cp932inv_f = FALSE;
1551 #endif
1552         break;
1553 #ifdef UTF8_OUTPUT_ENABLE
1554     case UTF_8_BOM:
1555         output_bom_f = TRUE;
1556         break;
1557     case UTF_16:
1558     case UTF_16BE_BOM:
1559         output_bom_f = TRUE;
1560         break;
1561     case UTF_16LE:
1562         output_endian = ENDIAN_LITTLE;
1563         output_bom_f = FALSE;
1564         break;
1565     case UTF_16LE_BOM:
1566         output_endian = ENDIAN_LITTLE;
1567         output_bom_f = TRUE;
1568         break;
1569     case UTF_32BE_BOM:
1570         output_bom_f = TRUE;
1571         break;
1572     case UTF_32LE:
1573         output_endian = ENDIAN_LITTLE;
1574         output_bom_f = FALSE;
1575         break;
1576     case UTF_32LE_BOM:
1577         output_endian = ENDIAN_LITTLE;
1578         output_bom_f = TRUE;
1579         break;
1580 #endif
1581     }
1582 }
1583
1584 static int option_mode = 0;
1585
1586 void options(unsigned char *cp)
1587 {
1588     nkf_char i, j;
1589     unsigned char *p;
1590     unsigned char *cp_back = NULL;
1591     char codeset[32];
1592     nkf_encoding *enc;
1593
1594     if (option_mode==1)
1595         return;
1596     while(*cp && *cp++!='-');
1597     while (*cp || cp_back) {
1598         if(!*cp){
1599             cp = cp_back;
1600             cp_back = NULL;
1601             continue;
1602         }
1603         p = 0;
1604         switch (*cp++) {
1605         case '-':  /* literal options */
1606             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1607                 option_mode = 1;
1608                 return;
1609             }
1610             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1611                 p = (unsigned char *)long_option[i].name;
1612                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1613                 if (*p == cp[j] || cp[j] == SP){
1614                     p = &cp[j] + 1;
1615                     break;
1616                 }
1617                 p = 0;
1618             }
1619             if (p == 0) {
1620                 fprintf(stderr, "unknown long option: --%s\n", cp);
1621                 return;
1622             }
1623             while(*cp && *cp != SP && cp++);
1624             if (long_option[i].alias[0]){
1625                 cp_back = cp;
1626                 cp = (unsigned char *)long_option[i].alias;
1627             }else{
1628                 if (strcmp(long_option[i].name, "ic=") == 0){
1629                     nkf_str_upcase((char *)p, codeset, 32);
1630                     enc = nkf_enc_find(codeset);
1631                     if (!enc) continue;
1632                     input_encoding = enc;
1633                     set_input_encoding(enc);
1634                     continue;
1635                 }
1636                 if (strcmp(long_option[i].name, "oc=") == 0){
1637                     nkf_str_upcase((char *)p, codeset, 32);
1638                     enc = nkf_enc_find(codeset);
1639                     if (enc <= 0) continue;
1640                     output_encoding = enc;
1641                     set_output_encoding(output_encoding);
1642                     continue;
1643                 }
1644                 if (strcmp(long_option[i].name, "guess=") == 0){
1645                     if (p[0] == '0' || p[0] == '1') {
1646                         guess_f = 1;
1647                     } else {
1648                         guess_f = 2;
1649                     }
1650                     continue;
1651                 }
1652 #ifdef OVERWRITE
1653                 if (strcmp(long_option[i].name, "overwrite") == 0){
1654                     file_out_f = TRUE;
1655                     overwrite_f = TRUE;
1656                     preserve_time_f = TRUE;
1657                     continue;
1658                 }
1659                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1660                     file_out_f = TRUE;
1661                     overwrite_f = TRUE;
1662                     preserve_time_f = TRUE;
1663                     backup_f = TRUE;
1664                     backup_suffix = malloc(strlen((char *) p) + 1);
1665                     strcpy(backup_suffix, (char *) p);
1666                     continue;
1667                 }
1668                 if (strcmp(long_option[i].name, "in-place") == 0){
1669                     file_out_f = TRUE;
1670                     overwrite_f = TRUE;
1671                     preserve_time_f = FALSE;
1672                     continue;
1673                 }
1674                 if (strcmp(long_option[i].name, "in-place=") == 0){
1675                     file_out_f = TRUE;
1676                     overwrite_f = TRUE;
1677                     preserve_time_f = FALSE;
1678                     backup_f = TRUE;
1679                     backup_suffix = malloc(strlen((char *) p) + 1);
1680                     strcpy(backup_suffix, (char *) p);
1681                     continue;
1682                 }
1683 #endif
1684 #ifdef INPUT_OPTION
1685                 if (strcmp(long_option[i].name, "cap-input") == 0){
1686                     cap_f = TRUE;
1687                     continue;
1688                 }
1689                 if (strcmp(long_option[i].name, "url-input") == 0){
1690                     url_f = TRUE;
1691                     continue;
1692                 }
1693 #endif
1694 #ifdef NUMCHAR_OPTION
1695                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1696                     numchar_f = TRUE;
1697                     continue;
1698                 }
1699 #endif
1700 #ifdef CHECK_OPTION
1701                 if (strcmp(long_option[i].name, "no-output") == 0){
1702                     noout_f = TRUE;
1703                     continue;
1704                 }
1705                 if (strcmp(long_option[i].name, "debug") == 0){
1706                     debug_f = TRUE;
1707                     continue;
1708                 }
1709 #endif
1710                 if (strcmp(long_option[i].name, "cp932") == 0){
1711 #ifdef SHIFTJIS_CP932
1712                     cp51932_f = TRUE;
1713                     cp932inv_f = TRUE;
1714 #endif
1715 #ifdef UTF8_OUTPUT_ENABLE
1716                     ms_ucs_map_f = UCS_MAP_CP932;
1717 #endif
1718                     continue;
1719                 }
1720                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1721 #ifdef SHIFTJIS_CP932
1722                     cp51932_f = FALSE;
1723                     cp932inv_f = FALSE;
1724 #endif
1725 #ifdef UTF8_OUTPUT_ENABLE
1726                     ms_ucs_map_f = UCS_MAP_ASCII;
1727 #endif
1728                     continue;
1729                 }
1730 #ifdef SHIFTJIS_CP932
1731                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1732                     cp932inv_f = TRUE;
1733                     continue;
1734                 }
1735 #endif
1736
1737 #ifdef X0212_ENABLE
1738                 if (strcmp(long_option[i].name, "x0212") == 0){
1739                     x0212_f = TRUE;
1740                     continue;
1741                 }
1742 #endif
1743
1744 #ifdef EXEC_IO
1745                   if (strcmp(long_option[i].name, "exec-in") == 0){
1746                       exec_f = 1;
1747                       return;
1748                   }
1749                   if (strcmp(long_option[i].name, "exec-out") == 0){
1750                       exec_f = -1;
1751                       return;
1752                   }
1753 #endif
1754 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1755                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1756                     no_cp932ext_f = TRUE;
1757                     continue;
1758                 }
1759                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1760                     no_best_fit_chars_f = TRUE;
1761                     continue;
1762                 }
1763                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1764                     encode_fallback = NULL;
1765                     continue;
1766                 }
1767                 if (strcmp(long_option[i].name, "fb-html") == 0){
1768                     encode_fallback = encode_fallback_html;
1769                     continue;
1770                 }
1771                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1772                     encode_fallback = encode_fallback_xml;
1773                     continue;
1774                 }
1775                 if (strcmp(long_option[i].name, "fb-java") == 0){
1776                     encode_fallback = encode_fallback_java;
1777                     continue;
1778                 }
1779                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1780                     encode_fallback = encode_fallback_perl;
1781                     continue;
1782                 }
1783                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1784                     encode_fallback = encode_fallback_subchar;
1785                     continue;
1786                 }
1787                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1788                     encode_fallback = encode_fallback_subchar;
1789                     unicode_subchar = 0;
1790                     if (p[0] != '0'){
1791                         /* decimal number */
1792                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1793                             unicode_subchar *= 10;
1794                             unicode_subchar += hex2bin(p[i]);
1795                         }
1796                     }else if(p[1] == 'x' || p[1] == 'X'){
1797                         /* hexadecimal number */
1798                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1799                             unicode_subchar <<= 4;
1800                             unicode_subchar |= hex2bin(p[i]);
1801                         }
1802                     }else{
1803                         /* octal number */
1804                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1805                             unicode_subchar *= 8;
1806                             unicode_subchar += hex2bin(p[i]);
1807                         }
1808                     }
1809                     w16e_conv(unicode_subchar, &i, &j);
1810                     unicode_subchar = i<<8 | j;
1811                     continue;
1812                 }
1813 #endif
1814 #ifdef UTF8_OUTPUT_ENABLE
1815                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1816                     ms_ucs_map_f = UCS_MAP_MS;
1817                     continue;
1818                 }
1819 #endif
1820 #ifdef UNICODE_NORMALIZATION
1821                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1822                     nfc_f = TRUE;
1823                     continue;
1824                 }
1825 #endif
1826                 if (strcmp(long_option[i].name, "prefix=") == 0){
1827                     if (nkf_isgraph(p[0])){
1828                         for (i = 1; nkf_isgraph(p[i]); i++){
1829                             prefix_table[p[i]] = p[0];
1830                         }
1831                     }
1832                     continue;
1833                 }
1834             }
1835             continue;
1836         case 'b':           /* buffered mode */
1837             unbuf_f = FALSE;
1838             continue;
1839         case 'u':           /* non bufferd mode */
1840             unbuf_f = TRUE;
1841             continue;
1842         case 't':           /* transparent mode */
1843             if (*cp=='1') {
1844                 /* alias of -t */
1845                 cp++;
1846                 nop_f = TRUE;
1847             } else if (*cp=='2') {
1848                 /*
1849                  * -t with put/get
1850                  *
1851                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1852                  *
1853                  */
1854                 cp++;
1855                 nop_f = 2;
1856             } else
1857                 nop_f = TRUE;
1858             continue;
1859         case 'j':           /* JIS output */
1860         case 'n':
1861             output_encoding = nkf_enc_from_index(ISO_2022_JP);
1862             continue;
1863         case 'e':           /* AT&T EUC output */
1864             cp932inv_f = FALSE;
1865             output_encoding = nkf_enc_from_index(EUC_JP);
1866             continue;
1867         case 's':           /* SJIS output */
1868             output_encoding = nkf_enc_from_index(WINDOWS_31J);
1869             continue;
1870         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1871             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1872             input_encoding = nkf_enc_from_index(ISO_8859_1);
1873             continue;
1874         case 'i':           /* Kanji IN ESC-$-@/B */
1875             if (*cp=='@'||*cp=='B')
1876                 kanji_intro = *cp++;
1877             continue;
1878         case 'o':           /* ASCII IN ESC-(-J/B */
1879             if (*cp=='J'||*cp=='B'||*cp=='H')
1880                 ascii_intro = *cp++;
1881             continue;
1882         case 'h':
1883             /*
1884                 bit:1   katakana->hiragana
1885                 bit:2   hiragana->katakana
1886             */
1887             if ('9'>= *cp && *cp>='0')
1888                 hira_f |= (*cp++ -'0');
1889             else
1890                 hira_f |= 1;
1891             continue;
1892         case 'r':
1893             rot_f = TRUE;
1894             continue;
1895 #if defined(MSDOS) || defined(__OS2__)
1896         case 'T':
1897             binmode_f = FALSE;
1898             continue;
1899 #endif
1900 #ifndef PERL_XS
1901         case 'V':
1902             show_configuration();
1903             exit(1);
1904             break;
1905         case 'v':
1906             usage();
1907             exit(1);
1908             break;
1909 #endif
1910 #ifdef UTF8_OUTPUT_ENABLE
1911         case 'w':           /* UTF-8 output */
1912             if (cp[0] == '8') {
1913                 cp++;
1914                 if (cp[0] == '0'){
1915                     cp++;
1916                     output_encoding = nkf_enc_from_index(UTF_8N);
1917                 } else {
1918                     output_bom_f = TRUE;
1919                     output_encoding = nkf_enc_from_index(UTF_8_BOM);
1920                 }
1921             } else {
1922                 int enc_idx;
1923                 if ('1'== cp[0] && '6'==cp[1]) {
1924                     cp += 2;
1925                     enc_idx = UTF_16;
1926                 } else if ('3'== cp[0] && '2'==cp[1]) {
1927                     cp += 2;
1928                     enc_idx = UTF_32;
1929                 } else {
1930                     output_encoding = nkf_enc_from_index(UTF_8);
1931                     continue;
1932                 }
1933                 if (cp[0]=='L') {
1934                     cp++;
1935                     output_endian = ENDIAN_LITTLE;
1936                 } else if (cp[0] == 'B') {
1937                     cp++;
1938                 } else {
1939                     output_encoding = nkf_enc_from_index(enc_idx);
1940                     continue;
1941                 }
1942                 if (cp[0] == '0'){
1943                     cp++;
1944                     enc_idx = enc_idx == UTF_16
1945                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1946                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1947                 } else {
1948                     output_bom_f = TRUE;
1949                     enc_idx = enc_idx == UTF_16
1950                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
1951                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
1952                 }
1953                 output_encoding = nkf_enc_from_index(enc_idx);
1954             }
1955             continue;
1956 #endif
1957 #ifdef UTF8_INPUT_ENABLE
1958         case 'W':           /* UTF input */
1959             if (cp[0] == '8') {
1960                 cp++;
1961                 input_encoding = nkf_enc_from_index(UTF_8);
1962             }else{
1963                 int enc_idx;
1964                 if ('1'== cp[0] && '6'==cp[1]) {
1965                     cp += 2;
1966                     input_endian = ENDIAN_BIG;
1967                     enc_idx = UTF_16;
1968                 } else if ('3'== cp[0] && '2'==cp[1]) {
1969                     cp += 2;
1970                     input_endian = ENDIAN_BIG;
1971                     enc_idx = UTF_32;
1972                 } else {
1973                     input_encoding = nkf_enc_from_index(UTF_8);
1974                     continue;
1975                 }
1976                 if (cp[0]=='L') {
1977                     cp++;
1978                     input_endian = ENDIAN_LITTLE;
1979                 } else if (cp[0] == 'B') {
1980                     cp++;
1981                     input_endian = ENDIAN_BIG;
1982                 }
1983                 enc_idx = enc_idx == UTF_16
1984                     ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1985                     : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1986                 input_encoding = nkf_enc_from_index(enc_idx);
1987             }
1988             continue;
1989 #endif
1990         /* Input code assumption */
1991         case 'J':   /* ISO-2022-JP input */
1992             input_encoding = nkf_enc_from_index(ISO_2022_JP);
1993             continue;
1994         case 'E':   /* EUC-JP input */
1995             input_encoding = nkf_enc_from_index(EUC_JP);
1996             continue;
1997         case 'S':   /* Windows-31J input */
1998             input_encoding = nkf_enc_from_index(WINDOWS_31J);
1999             continue;
2000         case 'Z':   /* Convert X0208 alphabet to asii */
2001             /* alpha_f
2002                bit:0   Convert JIS X 0208 Alphabet to ASCII
2003                bit:1   Convert Kankaku to one space
2004                bit:2   Convert Kankaku to two spaces
2005                bit:3   Convert HTML Entity
2006                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
2007             */
2008             while ('0'<= *cp && *cp <='9') {
2009                 alpha_f |= 1 << (*cp++ - '0');
2010             }
2011             if (!alpha_f) alpha_f = 1;
2012             continue;
2013         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
2014             x0201_f = FALSE;    /* No X0201->X0208 conversion */
2015             /* accept  X0201
2016                     ESC-(-I     in JIS, EUC, MS Kanji
2017                     SI/SO       in JIS, EUC, MS Kanji
2018                     SSO         in EUC, JIS, not in MS Kanji
2019                     MS Kanji (0xa0-0xdf)
2020                output  X0201
2021                     ESC-(-I     in JIS (0x20-0x5f)
2022                     SSO         in EUC (0xa0-0xdf)
2023                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
2024             */
2025             continue;
2026         case 'X':   /* Convert X0201 kana to X0208 */
2027             x0201_f = TRUE;
2028             continue;
2029         case 'F':   /* prserve new lines */
2030             fold_preserve_f = TRUE;
2031         case 'f':   /* folding -f60 or -f */
2032             fold_f = TRUE;
2033             fold_len = 0;
2034             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2035                 fold_len *= 10;
2036                 fold_len += *cp++ - '0';
2037             }
2038             if (!(0<fold_len && fold_len<BUFSIZ))
2039                 fold_len = DEFAULT_FOLD;
2040             if (*cp=='-') {
2041                 fold_margin = 0;
2042                 cp++;
2043                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2044                     fold_margin *= 10;
2045                     fold_margin += *cp++ - '0';
2046                 }
2047             }
2048             continue;
2049         case 'm':   /* MIME support */
2050             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
2051             if (*cp=='B'||*cp=='Q') {
2052                 mime_decode_mode = *cp++;
2053                 mimebuf_f = FIXED_MIME;
2054             } else if (*cp=='N') {
2055                 mime_f = TRUE; cp++;
2056             } else if (*cp=='S') {
2057                 mime_f = STRICT_MIME; cp++;
2058             } else if (*cp=='0') {
2059                 mime_decode_f = FALSE;
2060                 mime_f = FALSE; cp++;
2061             } else {
2062                 mime_f = STRICT_MIME;
2063             }
2064             continue;
2065         case 'M':   /* MIME output */
2066             if (*cp=='B') {
2067                 mimeout_mode = 'B';
2068                 mimeout_f = FIXED_MIME; cp++;
2069             } else if (*cp=='Q') {
2070                 mimeout_mode = 'Q';
2071                 mimeout_f = FIXED_MIME; cp++;
2072             } else {
2073                 mimeout_f = TRUE;
2074             }
2075             continue;
2076         case 'B':   /* Broken JIS support */
2077             /*  bit:0   no ESC JIS
2078                 bit:1   allow any x on ESC-(-x or ESC-$-x
2079                 bit:2   reset to ascii on NL
2080             */
2081             if ('9'>= *cp && *cp>='0')
2082                 broken_f |= 1<<(*cp++ -'0');
2083             else
2084                 broken_f |= TRUE;
2085             continue;
2086 #ifndef PERL_XS
2087         case 'O':/* for Output file */
2088             file_out_f = TRUE;
2089             continue;
2090 #endif
2091         case 'c':/* add cr code */
2092             nlmode_f = CRLF;
2093             continue;
2094         case 'd':/* delete cr code */
2095             nlmode_f = LF;
2096             continue;
2097         case 'I':   /* ISO-2022-JP output */
2098             iso2022jp_f = TRUE;
2099             continue;
2100         case 'L':  /* line mode */
2101             if (*cp=='u') {         /* unix */
2102                 nlmode_f = LF; cp++;
2103             } else if (*cp=='m') { /* mac */
2104                 nlmode_f = CR; cp++;
2105             } else if (*cp=='w') { /* windows */
2106                 nlmode_f = CRLF; cp++;
2107             } else if (*cp=='0') { /* no conversion  */
2108                 nlmode_f = 0; cp++;
2109             }
2110             continue;
2111 #ifndef PERL_XS
2112         case 'g':
2113             if ('2' <= *cp && *cp <= '9') {
2114                 guess_f = 2;
2115                 cp++;
2116             } else if (*cp == '0' || *cp == '1') {
2117                 guess_f = 1;
2118                 cp++;
2119             } else {
2120                 guess_f = 1;
2121             }
2122             continue;
2123 #endif
2124         case SP:
2125         /* module muliple options in a string are allowed for Perl moudle  */
2126             while(*cp && *cp++!='-');
2127             continue;
2128         default:
2129             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
2130             /* bogus option but ignored */
2131             continue;
2132         }
2133     }
2134 }
2135
2136 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2137 {
2138     if (iconv_func){
2139         struct input_code *p = input_code_list;
2140         while (p->name){
2141             if (iconv_func == p->iconv_func){
2142                 return p;
2143             }
2144             p++;
2145         }
2146     }
2147     return 0;
2148 }
2149
2150 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2151 {
2152 #ifdef INPUT_CODE_FIX
2153     if (f || !input_encoding)
2154 #endif
2155         if (estab_f != f){
2156             estab_f = f;
2157         }
2158
2159     if (iconv_func
2160 #ifdef INPUT_CODE_FIX
2161         && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
2162 #endif
2163         ){
2164         iconv = iconv_func;
2165     }
2166 #ifdef CHECK_OPTION
2167     if (estab_f && iconv_for_check != iconv){
2168         struct input_code *p = find_inputcode_byfunc(iconv);
2169         if (p){
2170             set_input_codename(p->name);
2171             debug(p->name);
2172         }
2173         iconv_for_check = iconv;
2174     }
2175 #endif
2176 }
2177
2178 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
2179 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
2180 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
2181 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
2182 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2183 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
2184 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
2185 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
2186
2187 #define SCORE_INIT (SCORE_iMIME)
2188
2189 static const char score_table_A0[] = {
2190     0, 0, 0, 0,
2191     0, 0, 0, 0,
2192     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2193     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2194 };
2195
2196 static const char score_table_F0[] = {
2197     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2198     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2199     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2200     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2201 };
2202
2203 void set_code_score(struct input_code *ptr, nkf_char score)
2204 {
2205     if (ptr){
2206         ptr->score |= score;
2207     }
2208 }
2209
2210 void clr_code_score(struct input_code *ptr, nkf_char score)
2211 {
2212     if (ptr){
2213         ptr->score &= ~score;
2214     }
2215 }
2216
2217 void code_score(struct input_code *ptr)
2218 {
2219     nkf_char c2 = ptr->buf[0];
2220 #ifdef UTF8_OUTPUT_ENABLE
2221     nkf_char c1 = ptr->buf[1];
2222 #endif
2223     if (c2 < 0){
2224         set_code_score(ptr, SCORE_ERROR);
2225     }else if (c2 == SSO){
2226         set_code_score(ptr, SCORE_KANA);
2227     }else if (c2 == 0x8f){
2228         set_code_score(ptr, SCORE_X0212);
2229 #ifdef UTF8_OUTPUT_ENABLE
2230     }else if (!e2w_conv(c2, c1)){
2231         set_code_score(ptr, SCORE_NO_EXIST);
2232 #endif
2233     }else if ((c2 & 0x70) == 0x20){
2234         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2235     }else if ((c2 & 0x70) == 0x70){
2236         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2237     }else if ((c2 & 0x70) >= 0x50){
2238         set_code_score(ptr, SCORE_L2);
2239     }
2240 }
2241
2242 void status_disable(struct input_code *ptr)
2243 {
2244     ptr->stat = -1;
2245     ptr->buf[0] = -1;
2246     code_score(ptr);
2247     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2248 }
2249
2250 void status_push_ch(struct input_code *ptr, nkf_char c)
2251 {
2252     ptr->buf[ptr->index++] = c;
2253 }
2254
2255 void status_clear(struct input_code *ptr)
2256 {
2257     ptr->stat = 0;
2258     ptr->index = 0;
2259 }
2260
2261 void status_reset(struct input_code *ptr)
2262 {
2263     status_clear(ptr);
2264     ptr->score = SCORE_INIT;
2265 }
2266
2267 void status_reinit(struct input_code *ptr)
2268 {
2269     status_reset(ptr);
2270     ptr->_file_stat = 0;
2271 }
2272
2273 void status_check(struct input_code *ptr, nkf_char c)
2274 {
2275     if (c <= DEL && estab_f){
2276         status_reset(ptr);
2277     }
2278 }
2279
2280 void s_status(struct input_code *ptr, nkf_char c)
2281 {
2282     switch(ptr->stat){
2283       case -1:
2284           status_check(ptr, c);
2285           break;
2286       case 0:
2287           if (c <= DEL){
2288               break;
2289 #ifdef NUMCHAR_OPTION
2290           }else if (is_unicode_capsule(c)){
2291               break;
2292 #endif
2293           }else if (0xa1 <= c && c <= 0xdf){
2294               status_push_ch(ptr, SSO);
2295               status_push_ch(ptr, c);
2296               code_score(ptr);
2297               status_clear(ptr);
2298           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2299               ptr->stat = 1;
2300               status_push_ch(ptr, c);
2301           }else if (0xed <= c && c <= 0xee){
2302               ptr->stat = 3;
2303               status_push_ch(ptr, c);
2304 #ifdef SHIFTJIS_CP932
2305           }else if (is_ibmext_in_sjis(c)){
2306               ptr->stat = 2;
2307               status_push_ch(ptr, c);
2308 #endif /* SHIFTJIS_CP932 */
2309 #ifdef X0212_ENABLE
2310           }else if (0xf0 <= c && c <= 0xfc){
2311               ptr->stat = 1;
2312               status_push_ch(ptr, c);
2313 #endif /* X0212_ENABLE */
2314           }else{
2315               status_disable(ptr);
2316           }
2317           break;
2318       case 1:
2319           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2320               status_push_ch(ptr, c);
2321               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2322               code_score(ptr);
2323               status_clear(ptr);
2324           }else{
2325               status_disable(ptr);
2326           }
2327           break;
2328       case 2:
2329 #ifdef SHIFTJIS_CP932
2330         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2331             status_push_ch(ptr, c);
2332             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2333                 set_code_score(ptr, SCORE_CP932);
2334                 status_clear(ptr);
2335                 break;
2336             }
2337         }
2338 #endif /* SHIFTJIS_CP932 */
2339         status_disable(ptr);
2340           break;
2341       case 3:
2342           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2343               status_push_ch(ptr, c);
2344               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2345             set_code_score(ptr, SCORE_CP932);
2346             status_clear(ptr);
2347           }else{
2348               status_disable(ptr);
2349           }
2350           break;
2351     }
2352 }
2353
2354 void e_status(struct input_code *ptr, nkf_char c)
2355 {
2356     switch (ptr->stat){
2357       case -1:
2358           status_check(ptr, c);
2359           break;
2360       case 0:
2361           if (c <= DEL){
2362               break;
2363 #ifdef NUMCHAR_OPTION
2364           }else if (is_unicode_capsule(c)){
2365               break;
2366 #endif
2367           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2368               ptr->stat = 1;
2369               status_push_ch(ptr, c);
2370 #ifdef X0212_ENABLE
2371           }else if (0x8f == c){
2372               ptr->stat = 2;
2373               status_push_ch(ptr, c);
2374 #endif /* X0212_ENABLE */
2375           }else{
2376               status_disable(ptr);
2377           }
2378           break;
2379       case 1:
2380           if (0xa1 <= c && c <= 0xfe){
2381               status_push_ch(ptr, c);
2382               code_score(ptr);
2383               status_clear(ptr);
2384           }else{
2385               status_disable(ptr);
2386           }
2387           break;
2388 #ifdef X0212_ENABLE
2389       case 2:
2390           if (0xa1 <= c && c <= 0xfe){
2391               ptr->stat = 1;
2392               status_push_ch(ptr, c);
2393           }else{
2394               status_disable(ptr);
2395           }
2396 #endif /* X0212_ENABLE */
2397     }
2398 }
2399
2400 #ifdef UTF8_INPUT_ENABLE
2401 void w_status(struct input_code *ptr, nkf_char c)
2402 {
2403     switch (ptr->stat){
2404       case -1:
2405           status_check(ptr, c);
2406           break;
2407       case 0:
2408           if (c <= DEL){
2409               break;
2410 #ifdef NUMCHAR_OPTION
2411           }else if (is_unicode_capsule(c)){
2412               break;
2413 #endif
2414           }else if (0xc0 <= c && c <= 0xdf){
2415               ptr->stat = 1;
2416               status_push_ch(ptr, c);
2417           }else if (0xe0 <= c && c <= 0xef){
2418               ptr->stat = 2;
2419               status_push_ch(ptr, c);
2420           }else if (0xf0 <= c && c <= 0xf4){
2421               ptr->stat = 3;
2422               status_push_ch(ptr, c);
2423           }else{
2424               status_disable(ptr);
2425           }
2426           break;
2427       case 1:
2428       case 2:
2429           if (0x80 <= c && c <= 0xbf){
2430               status_push_ch(ptr, c);
2431               if (ptr->index > ptr->stat){
2432                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2433                              && ptr->buf[2] == 0xbf);
2434                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2435                            &ptr->buf[0], &ptr->buf[1]);
2436                   if (!bom){
2437                       code_score(ptr);
2438                   }
2439                   status_clear(ptr);
2440               }
2441           }else{
2442               status_disable(ptr);
2443           }
2444           break;
2445       case 3:
2446         if (0x80 <= c && c <= 0xbf){
2447             if (ptr->index < ptr->stat){
2448                 status_push_ch(ptr, c);
2449             } else {
2450                 status_clear(ptr);
2451             }
2452           }else{
2453               status_disable(ptr);
2454           }
2455           break;
2456     }
2457 }
2458 #endif
2459
2460 void code_status(nkf_char c)
2461 {
2462     int action_flag = 1;
2463     struct input_code *result = 0;
2464     struct input_code *p = input_code_list;
2465     while (p->name){
2466         if (!p->status_func) {
2467             ++p;
2468             continue;
2469         }
2470         if (!p->status_func)
2471             continue;
2472         (p->status_func)(p, c);
2473         if (p->stat > 0){
2474             action_flag = 0;
2475         }else if(p->stat == 0){
2476             if (result){
2477                 action_flag = 0;
2478             }else{
2479                 result = p;
2480             }
2481         }
2482         ++p;
2483     }
2484
2485     if (action_flag){
2486         if (result && !estab_f){
2487             set_iconv(TRUE, result->iconv_func);
2488         }else if (c <= DEL){
2489             struct input_code *ptr = input_code_list;
2490             while (ptr->name){
2491                 status_reset(ptr);
2492                 ++ptr;
2493             }
2494         }
2495     }
2496 }
2497
2498 #ifndef WIN32DLL
2499 nkf_char std_getc(FILE *f)
2500 {
2501     if (std_gc_ndx){
2502         return std_gc_buf[--std_gc_ndx];
2503     }
2504     return getc(f);
2505 }
2506 #endif /*WIN32DLL*/
2507
2508 nkf_char std_ungetc(nkf_char c, FILE *f)
2509 {
2510     if (std_gc_ndx == STD_GC_BUFSIZE){
2511         return EOF;
2512     }
2513     std_gc_buf[std_gc_ndx++] = c;
2514     return c;
2515 }
2516
2517 #ifndef WIN32DLL
2518 void std_putc(nkf_char c)
2519 {
2520     if(c!=EOF)
2521       putchar(c);
2522 }
2523 #endif /*WIN32DLL*/
2524
2525 #if !defined(PERL_XS) && !defined(WIN32DLL)
2526 nkf_char noconvert(FILE *f)
2527 {
2528     nkf_char    c;
2529
2530     if (nop_f == 2)
2531         module_connection();
2532     while ((c = (*i_getc)(f)) != EOF)
2533       (*o_putc)(c);
2534     (*o_putc)(EOF);
2535     return 1;
2536 }
2537 #endif
2538
2539 void module_connection(void)
2540 {
2541     if (!output_encoding) {
2542         output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
2543         set_output_encoding(output_encoding);
2544     }
2545     oconv = nkf_enc_to_oconv(output_encoding);
2546     o_putc = std_putc;
2547
2548     /* replace continucation module, from output side */
2549
2550     /* output redicrection */
2551 #ifdef CHECK_OPTION
2552     if (noout_f || guess_f){
2553         o_putc = no_putc;
2554     }
2555 #endif
2556     if (mimeout_f) {
2557         o_mputc = o_putc;
2558         o_putc = mime_putc;
2559         if (mimeout_f == TRUE) {
2560             o_base64conv = oconv; oconv = base64_conv;
2561         }
2562         /* base64_count = 0; */
2563     }
2564
2565     if (nlmode_f || guess_f) {
2566         o_nlconv = oconv; oconv = nl_conv;
2567     }
2568     if (rot_f) {
2569         o_rot_conv = oconv; oconv = rot_conv;
2570     }
2571     if (iso2022jp_f) {
2572         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2573     }
2574     if (hira_f) {
2575         o_hira_conv = oconv; oconv = hira_conv;
2576     }
2577     if (fold_f) {
2578         o_fconv = oconv; oconv = fold_conv;
2579         f_line = 0;
2580     }
2581     if (alpha_f || x0201_f) {
2582         o_zconv = oconv; oconv = z_conv;
2583     }
2584
2585     i_getc = std_getc;
2586     i_ungetc = std_ungetc;
2587     /* input redicrection */
2588 #ifdef INPUT_OPTION
2589     if (cap_f){
2590         i_cgetc = i_getc; i_getc = cap_getc;
2591         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2592     }
2593     if (url_f){
2594         i_ugetc = i_getc; i_getc = url_getc;
2595         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2596     }
2597 #endif
2598 #ifdef NUMCHAR_OPTION
2599     if (numchar_f){
2600         i_ngetc = i_getc; i_getc = numchar_getc;
2601         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2602     }
2603 #endif
2604 #ifdef UNICODE_NORMALIZATION
2605     if (nfc_f){
2606         i_nfc_getc = i_getc; i_getc = nfc_getc;
2607         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2608     }
2609 #endif
2610     if (mime_f && mimebuf_f==FIXED_MIME) {
2611         i_mgetc = i_getc; i_getc = mime_getc;
2612         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2613     }
2614     if (broken_f & 1) {
2615         i_bgetc = i_getc; i_getc = broken_getc;
2616         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2617     }
2618     if (input_encoding) {
2619         set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
2620     } else {
2621         set_iconv(FALSE, e_iconv);
2622     }
2623
2624     {
2625         struct input_code *p = input_code_list;
2626         while (p->name){
2627             status_reinit(p++);
2628         }
2629     }
2630 }
2631
2632 /*
2633  * Check and Ignore BOM
2634  */
2635 void check_bom(FILE *f)
2636 {
2637     int c2;
2638     switch(c2 = (*i_getc)(f)){
2639     case 0x00:
2640         if((c2 = (*i_getc)(f)) == 0x00){
2641             if((c2 = (*i_getc)(f)) == 0xFE){
2642                 if((c2 = (*i_getc)(f)) == 0xFF){
2643                     if(!input_encoding){
2644                         set_iconv(TRUE, w_iconv32);
2645                     }
2646                     if (iconv == w_iconv32) {
2647                         input_endian = ENDIAN_BIG;
2648                         return;
2649                     }
2650                     (*i_ungetc)(0xFF,f);
2651                 }else (*i_ungetc)(c2,f);
2652                 (*i_ungetc)(0xFE,f);
2653             }else if(c2 == 0xFF){
2654                 if((c2 = (*i_getc)(f)) == 0xFE){
2655                     if(!input_encoding){
2656                         set_iconv(TRUE, w_iconv32);
2657                     }
2658                     if (iconv == w_iconv32) {
2659                         input_endian = ENDIAN_2143;
2660                         return;
2661                     }
2662                     (*i_ungetc)(0xFF,f);
2663                 }else (*i_ungetc)(c2,f);
2664                 (*i_ungetc)(0xFF,f);
2665             }else (*i_ungetc)(c2,f);
2666             (*i_ungetc)(0x00,f);
2667         }else (*i_ungetc)(c2,f);
2668         (*i_ungetc)(0x00,f);
2669         break;
2670     case 0xEF:
2671         if((c2 = (*i_getc)(f)) == 0xBB){
2672             if((c2 = (*i_getc)(f)) == 0xBF){
2673                 if(!input_encoding){
2674                     set_iconv(TRUE, w_iconv);
2675                 }
2676                 if (iconv == w_iconv) {
2677                     return;
2678                 }
2679                 (*i_ungetc)(0xBF,f);
2680             }else (*i_ungetc)(c2,f);
2681             (*i_ungetc)(0xBB,f);
2682         }else (*i_ungetc)(c2,f);
2683         (*i_ungetc)(0xEF,f);
2684         break;
2685     case 0xFE:
2686         if((c2 = (*i_getc)(f)) == 0xFF){
2687             if((c2 = (*i_getc)(f)) == 0x00){
2688                 if((c2 = (*i_getc)(f)) == 0x00){
2689                     if(!input_encoding){
2690                         set_iconv(TRUE, w_iconv32);
2691                     }
2692                     if (iconv == w_iconv32) {
2693                         input_endian = ENDIAN_3412;
2694                         return;
2695                     }
2696                     (*i_ungetc)(0x00,f);
2697                 }else (*i_ungetc)(c2,f);
2698                 (*i_ungetc)(0x00,f);
2699             }else (*i_ungetc)(c2,f);
2700             if(!input_encoding){
2701                 set_iconv(TRUE, w_iconv16);
2702             }
2703             if (iconv == w_iconv16) {
2704                 input_endian = ENDIAN_BIG;
2705                 return;
2706             }
2707             (*i_ungetc)(0xFF,f);
2708         }else (*i_ungetc)(c2,f);
2709         (*i_ungetc)(0xFE,f);
2710         break;
2711     case 0xFF:
2712         if((c2 = (*i_getc)(f)) == 0xFE){
2713             if((c2 = (*i_getc)(f)) == 0x00){
2714                 if((c2 = (*i_getc)(f)) == 0x00){
2715                     if(!input_encoding){
2716                         set_iconv(TRUE, w_iconv32);
2717                     }
2718                     if (iconv == w_iconv32) {
2719                         input_endian = ENDIAN_LITTLE;
2720                         return;
2721                     }
2722                     (*i_ungetc)(0x00,f);
2723                 }else (*i_ungetc)(c2,f);
2724                 (*i_ungetc)(0x00,f);
2725             }else (*i_ungetc)(c2,f);
2726             if(!input_encoding){
2727                 set_iconv(TRUE, w_iconv16);
2728             }
2729             if (iconv == w_iconv16) {
2730                 input_endian = ENDIAN_LITTLE;
2731                 return;
2732             }
2733             (*i_ungetc)(0xFE,f);
2734         }else (*i_ungetc)(c2,f);
2735         (*i_ungetc)(0xFF,f);
2736         break;
2737     default:
2738         (*i_ungetc)(c2,f);
2739         break;
2740     }
2741 }
2742
2743 /*
2744    Conversion main loop. Code detection only.
2745  */
2746
2747 nkf_char kanji_convert(FILE *f)
2748 {
2749     nkf_char    c3, c2=0, c1, c0=0;
2750     int is_8bit = FALSE;
2751
2752     if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
2753         is_8bit = TRUE;
2754     }
2755
2756     input_mode = ASCII;
2757     output_mode = ASCII;
2758     shift_mode = FALSE;
2759
2760 #define NEXT continue      /* no output, get next */
2761 #define SEND ;             /* output c1 and c2, get next */
2762 #define LAST break         /* end of loop, go closing  */
2763
2764     module_connection();
2765     check_bom(f);
2766
2767     while ((c1 = (*i_getc)(f)) != EOF) {
2768 #ifdef INPUT_CODE_FIX
2769         if (!input_encoding)
2770 #endif
2771             code_status(c1);
2772         if (c2) {
2773             /* second byte */
2774             if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
2775                 /* in case of 8th bit is on */
2776                 if (!estab_f&&!mime_decode_mode) {
2777                     /* in case of not established yet */
2778                     /* It is still ambiguious */
2779                     if (h_conv(f, c2, c1)==EOF)
2780                         LAST;
2781                     else
2782                         c2 = 0;
2783                     NEXT;
2784                 } else {
2785                     /* in case of already established */
2786                     if (c1 < AT) {
2787                         /* ignore bogus code and not CP5022x UCD */
2788                         c2 = 0;
2789                         NEXT;
2790                     } else {
2791                         SEND;
2792                     }
2793                 }
2794             } else
2795                 /* second byte, 7 bit code */
2796                 /* it might be kanji shitfted */
2797                 if ((c1 == DEL) || (c1 <= SP)) {
2798                     /* ignore bogus first code */
2799                     c2 = 0;
2800                     NEXT;
2801                 } else
2802                     SEND;
2803         } else {
2804             /* first byte */
2805 #ifdef UTF8_INPUT_ENABLE
2806             if (iconv == w_iconv16) {
2807                 if (input_endian == ENDIAN_BIG) {
2808                     c2 = c1;
2809                     if ((c1 = (*i_getc)(f)) != EOF) {
2810                         if (0xD8 <= c2 && c2 <= 0xDB) {
2811                             if ((c0 = (*i_getc)(f)) != EOF) {
2812                                 c0 <<= 8;
2813                                 if ((c3 = (*i_getc)(f)) != EOF) {
2814                                     c0 |= c3;
2815                                 } else c2 = EOF;
2816                             } else c2 = EOF;
2817                         }
2818                     } else c2 = EOF;
2819                 } else {
2820                     if ((c2 = (*i_getc)(f)) != EOF) {
2821                         if (0xD8 <= c2 && c2 <= 0xDB) {
2822                             if ((c3 = (*i_getc)(f)) != EOF) {
2823                                 if ((c0 = (*i_getc)(f)) != EOF) {
2824                                     c0 <<= 8;
2825                                     c0 |= c3;
2826                                 } else c2 = EOF;
2827                             } else c2 = EOF;
2828                         }
2829                     } else c2 = EOF;
2830                 }
2831                 SEND;
2832             } else if(iconv == w_iconv32){
2833                 int c3 = c1;
2834                 if((c2 = (*i_getc)(f)) != EOF &&
2835                    (c1 = (*i_getc)(f)) != EOF &&
2836                    (c0 = (*i_getc)(f)) != EOF){
2837                     switch(input_endian){
2838                     case ENDIAN_BIG:
2839                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2840                         break;
2841                     case ENDIAN_LITTLE:
2842                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2843                         break;
2844                     case ENDIAN_2143:
2845                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2846                         break;
2847                     case ENDIAN_3412:
2848                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2849                         break;
2850                     }
2851                     c2 = 0;
2852                 }else{
2853                     c2 = EOF;
2854                 }
2855                 SEND;
2856             } else
2857 #endif
2858 #ifdef NUMCHAR_OPTION
2859             if (is_unicode_capsule(c1)){
2860                 SEND;
2861             } else
2862 #endif
2863             if (c1 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
2864                 /* 8 bit code */
2865                 if (!estab_f && !iso8859_f) {
2866                     /* not established yet */
2867                     c2 = c1;
2868                     NEXT;
2869                 } else { /* estab_f==TRUE */
2870                     if (iso8859_f) {
2871                         c2 = ISO_8859_1;
2872                         c1 &= 0x7f;
2873                         SEND;
2874                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2875                         /* SJIS X0201 Case... */
2876                         if (iso2022jp_f && !x0201_f) {
2877                             (*oconv)(GETA1, GETA2);
2878                             NEXT;
2879                         } else {
2880                             c2 = JIS_X_0201;
2881                             c1 &= 0x7f;
2882                             SEND;
2883                         }
2884                     } else if (c1==SSO && iconv != s_iconv) {
2885                         /* EUC X0201 Case */
2886                         c1 = (*i_getc)(f);  /* skip SSO */
2887                         code_status(c1);
2888                         if (SSP<=c1 && c1<0xe0) {
2889                             if (iso2022jp_f && !x0201_f) {
2890                                 (*oconv)(GETA1, GETA2);
2891                                 NEXT;
2892                             } else {
2893                                 c2 = JIS_X_0201;
2894                                 c1 &= 0x7f;
2895                                 SEND;
2896                             }
2897                         } else  { /* bogus code, skip SSO and one byte */
2898                             NEXT;
2899                         }
2900                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2901                                (c1 == 0xFD || c1 == 0xFE)) {
2902                         /* CP10001 */
2903                         c2 = JIS_X_0201;
2904                         c1 &= 0x7f;
2905                         SEND;
2906                     } else {
2907                        /* already established */
2908                        c2 = c1;
2909                        NEXT;
2910                     }
2911                 }
2912             } else if ((c1 > SP) && (c1 != DEL)) {
2913                 /* in case of Roman characters */
2914                 if (shift_mode) {
2915                     /* output 1 shifted byte */
2916                     if (iso8859_f) {
2917                         c2 = ISO_8859_1;
2918                         SEND;
2919                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2920                       /* output 1 shifted byte */
2921                         if (iso2022jp_f && !x0201_f) {
2922                             (*oconv)(GETA1, GETA2);
2923                             NEXT;
2924                         } else {
2925                             c2 = JIS_X_0201;
2926                             SEND;
2927                         }
2928                     } else {
2929                         /* look like bogus code */
2930                         NEXT;
2931                     }
2932                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
2933                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
2934                     /* in case of Kanji shifted */
2935                     c2 = c1;
2936                     NEXT;
2937                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2938                     /* Check MIME code */
2939                     if ((c1 = (*i_getc)(f)) == EOF) {
2940                         (*oconv)(0, '=');
2941                         LAST;
2942                     } else if (c1 == '?') {
2943                         /* =? is mime conversion start sequence */
2944                         if(mime_f == STRICT_MIME) {
2945                             /* check in real detail */
2946                             if (mime_begin_strict(f) == EOF)
2947                                 LAST;
2948                             else
2949                                 NEXT;
2950                         } else if (mime_begin(f) == EOF)
2951                             LAST;
2952                         else
2953                             NEXT;
2954                     } else {
2955                         (*oconv)(0, '=');
2956                         (*i_ungetc)(c1,f);
2957                         NEXT;
2958                     }
2959                 } else {
2960                     /* normal ASCII code */
2961                     SEND;
2962                 }
2963             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2964                 shift_mode = FALSE;
2965                 NEXT;
2966             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2967                 shift_mode = TRUE;
2968                 NEXT;
2969             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2970                 if ((c1 = (*i_getc)(f)) == EOF) {
2971                     /*  (*oconv)(0, ESC); don't send bogus code */
2972                     LAST;
2973                 } else if (c1 == '$') {
2974                     if ((c1 = (*i_getc)(f)) == EOF) {
2975                         /*
2976                         (*oconv)(0, ESC); don't send bogus code
2977                         (*oconv)(0, '$'); */
2978                         LAST;
2979                     } else if (c1 == '@'|| c1 == 'B') {
2980                         /* This is kanji introduction */
2981                         input_mode = JIS_X_0208;
2982                         shift_mode = FALSE;
2983                         set_input_codename("ISO-2022-JP");
2984 #ifdef CHECK_OPTION
2985                         debug("ISO-2022-JP");
2986 #endif
2987                         NEXT;
2988                     } else if (c1 == '(') {
2989                         if ((c1 = (*i_getc)(f)) == EOF) {
2990                             /* don't send bogus code
2991                             (*oconv)(0, ESC);
2992                             (*oconv)(0, '$');
2993                             (*oconv)(0, '(');
2994                                 */
2995                             LAST;
2996                         } else if (c1 == '@'|| c1 == 'B') {
2997                             /* This is kanji introduction */
2998                             input_mode = JIS_X_0208;
2999                             shift_mode = FALSE;
3000                             NEXT;
3001 #ifdef X0212_ENABLE
3002                         } else if (c1 == 'D'){
3003                             input_mode = JIS_X_0212;
3004                             shift_mode = FALSE;
3005                             NEXT;
3006 #endif /* X0212_ENABLE */
3007                         } else if (c1 == 0x4F){
3008                             input_mode = JIS_X_0213_1;
3009                             shift_mode = FALSE;
3010                             NEXT;
3011                         } else if (c1 == 0x50){
3012                             input_mode = JIS_X_0213_2;
3013                             shift_mode = FALSE;
3014                             NEXT;
3015                         } else {
3016                             /* could be some special code */
3017                             (*oconv)(0, ESC);
3018                             (*oconv)(0, '$');
3019                             (*oconv)(0, '(');
3020                             (*oconv)(0, c1);
3021                             NEXT;
3022                         }
3023                     } else if (broken_f&0x2) {
3024                         /* accept any ESC-(-x as broken code ... */
3025                         input_mode = JIS_X_0208;
3026                         shift_mode = FALSE;
3027                         NEXT;
3028                     } else {
3029                         (*oconv)(0, ESC);
3030                         (*oconv)(0, '$');
3031                         (*oconv)(0, c1);
3032                         NEXT;
3033                     }
3034                 } else if (c1 == '(') {
3035                     if ((c1 = (*i_getc)(f)) == EOF) {
3036                         /* don't send bogus code
3037                         (*oconv)(0, ESC);
3038                         (*oconv)(0, '('); */
3039                         LAST;
3040                     } else {
3041                         if (c1 == 'I') {
3042                             /* This is X0201 kana introduction */
3043                             input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
3044                             NEXT;
3045                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
3046                             /* This is X0208 kanji introduction */
3047                             input_mode = ASCII; shift_mode = FALSE;
3048                             NEXT;
3049                         } else if (broken_f&0x2) {
3050                             input_mode = ASCII; shift_mode = FALSE;
3051                             NEXT;
3052                         } else {
3053                             (*oconv)(0, ESC);
3054                             (*oconv)(0, '(');
3055                             /* maintain various input_mode here */
3056                             SEND;
3057                         }
3058                     }
3059                } else if ( c1 == 'N' || c1 == 'n'){
3060                    /* SS2 */
3061                    c3 = (*i_getc)(f);  /* skip SS2 */
3062                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
3063                        c1 = c3;
3064                        c2 = JIS_X_0201;
3065                        SEND;
3066                    }else{
3067                        (*i_ungetc)(c3, f);
3068                        /* lonely ESC  */
3069                        (*oconv)(0, ESC);
3070                        SEND;
3071                    }
3072                 } else {
3073                     /* lonely ESC  */
3074                     (*oconv)(0, ESC);
3075                     SEND;
3076                 }
3077             } else if (c1 == ESC && iconv == s_iconv) {
3078                 /* ESC in Shift_JIS */
3079                 if ((c1 = (*i_getc)(f)) == EOF) {
3080                     /*  (*oconv)(0, ESC); don't send bogus code */
3081                     LAST;
3082                 } else if (c1 == '$') {
3083                     /* J-PHONE emoji */
3084                     if ((c1 = (*i_getc)(f)) == EOF) {
3085                         /*
3086                            (*oconv)(0, ESC); don't send bogus code
3087                            (*oconv)(0, '$'); */
3088                         LAST;
3089                     } else {
3090                         if (('E' <= c1 && c1 <= 'G') ||
3091                             ('O' <= c1 && c1 <= 'Q')) {
3092                             /*
3093                                NUM : 0 1 2 3 4 5
3094                                BYTE: G E F O P Q
3095                                C%7 : 1 6 0 2 3 4
3096                                C%7 : 0 1 2 3 4 5 6
3097                                NUM : 2 0 3 4 5 X 1
3098                              */
3099                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
3100                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
3101                             while ((c1 = (*i_getc)(f)) != EOF) {
3102                                 if (SP <= c1 && c1 <= 'z') {
3103                                     (*oconv)(0, c1 + c0);
3104                                 } else break; /* c1 == SO */
3105                             }
3106                         }
3107                     }
3108                     if (c1 == EOF) LAST;
3109                     NEXT;
3110                 } else {
3111                     /* lonely ESC  */
3112                     (*oconv)(0, ESC);
3113                     SEND;
3114                 }
3115             } else if (c1 == LF || c1 == CR) {
3116                 if (broken_f&4) {
3117                     input_mode = ASCII; set_iconv(FALSE, 0);
3118                     SEND;
3119                 } else if (mime_decode_f && !mime_decode_mode){
3120                     if (c1 == LF) {
3121                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
3122                             i_ungetc(SP,f);
3123                             continue;
3124                         } else {
3125                             i_ungetc(c1,f);
3126                         }
3127                         c1 = LF;
3128                         SEND;
3129                     } else  { /* if (c1 == CR)*/
3130                         if ((c1=(*i_getc)(f))!=EOF) {
3131                             if (c1==SP) {
3132                                 i_ungetc(SP,f);
3133                                 continue;
3134                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
3135                                 i_ungetc(SP,f);
3136                                 continue;
3137                             } else {
3138                                 i_ungetc(c1,f);
3139                             }
3140                             i_ungetc(LF,f);
3141                         } else {
3142                             i_ungetc(c1,f);
3143                         }
3144                         c1 = CR;
3145                         SEND;
3146                     }
3147                 }
3148             } else if (c1 == DEL && input_mode == JIS_X_0208) {
3149                 /* CP5022x */
3150                 c2 = c1;
3151                 NEXT;
3152             } else
3153                 SEND;
3154         }
3155         /* send: */
3156         switch(input_mode){
3157         case ASCII:
3158             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
3159             case -2:
3160                 /* 4 bytes UTF-8 */
3161                 if ((c0 = (*i_getc)(f)) != EOF) {
3162                     code_status(c0);
3163                     c0 <<= 8;
3164                     if ((c3 = (*i_getc)(f)) != EOF) {
3165                         code_status(c3);
3166                         (*iconv)(c2, c1, c0|c3);
3167                     }
3168                 }
3169                 break;
3170             case -1:
3171                 /* 3 bytes EUC or UTF-8 */
3172                 if ((c0 = (*i_getc)(f)) != EOF) {
3173                     code_status(c0);
3174                     (*iconv)(c2, c1, c0);
3175                 }
3176                 break;
3177             }
3178             break;
3179         case JIS_X_0208:
3180         case JIS_X_0213_1:
3181             if (ms_ucs_map_f &&
3182                 0x7F <= c2 && c2 <= 0x92 &&
3183                 0x21 <= c1 && c1 <= 0x7E) {
3184                 /* CP932 UDC */
3185                 if(c1 == 0x7F) return 0;
3186                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
3187                 c2 = 0;
3188             }
3189             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
3190             break;
3191 #ifdef X0212_ENABLE
3192         case JIS_X_0212:
3193             (*oconv)(PREFIX_EUCG3 | c2, c1);
3194             break;
3195 #endif /* X0212_ENABLE */
3196         case JIS_X_0213_2:
3197             (*oconv)(PREFIX_EUCG3 | c2, c1);
3198             break;
3199         default:
3200             (*oconv)(input_mode, c1);  /* other special case */
3201         }
3202
3203         c2 = 0;
3204         c0 = 0;
3205         continue;
3206         /* goto next_word */
3207     }
3208
3209     /* epilogue */
3210     (*iconv)(EOF, 0, 0);
3211     if (!input_codename)
3212     {
3213         if (is_8bit) {
3214             struct input_code *p = input_code_list;
3215             struct input_code *result = p;
3216             while (p->name){
3217                 if (p->score < result->score) result = p;
3218                 ++p;
3219             }
3220             set_input_codename(result->name);
3221 #ifdef CHECK_OPTION
3222             debug(result->name);
3223 #endif
3224         }
3225     }
3226     return 1;
3227 }
3228
3229 nkf_char
3230 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3231 {
3232     nkf_char ret, c3, c0;
3233     int hold_index;
3234
3235
3236     /** it must NOT be in the kanji shifte sequence      */
3237     /** it must NOT be written in JIS7                   */
3238     /** and it must be after 2 byte 8bit code            */
3239
3240     hold_count = 0;
3241     push_hold_buf(c2);
3242     push_hold_buf(c1);
3243
3244     while ((c1 = (*i_getc)(f)) != EOF) {
3245         if (c1 == ESC){
3246             (*i_ungetc)(c1,f);
3247             break;
3248         }
3249         code_status(c1);
3250         if (push_hold_buf(c1) == EOF || estab_f){
3251             break;
3252         }
3253     }
3254
3255     if (!estab_f){
3256         struct input_code *p = input_code_list;
3257         struct input_code *result = p;
3258         if (c1 == EOF){
3259             code_status(c1);
3260         }
3261         while (p->name){
3262             if (p->status_func && p->score < result->score){
3263                 result = p;
3264             }
3265             ++p;
3266         }
3267         set_iconv(TRUE, result->iconv_func);
3268     }
3269
3270
3271     /** now,
3272      ** 1) EOF is detected, or
3273      ** 2) Code is established, or
3274      ** 3) Buffer is FULL (but last word is pushed)
3275      **
3276      ** in 1) and 3) cases, we continue to use
3277      ** Kanji codes by oconv and leave estab_f unchanged.
3278      **/
3279
3280     ret = c1;
3281     hold_index = 0;
3282     while (hold_index < hold_count){
3283         c2 = hold_buf[hold_index++];
3284         if (c2 <= DEL
3285 #ifdef NUMCHAR_OPTION
3286             || is_unicode_capsule(c2)
3287 #endif
3288             ){
3289             (*iconv)(0, c2, 0);
3290             continue;
3291         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3292             (*iconv)(JIS_X_0201, c2, 0);
3293             continue;
3294         }
3295         if (hold_index < hold_count){
3296             c1 = hold_buf[hold_index++];
3297         }else{
3298             c1 = (*i_getc)(f);
3299             if (c1 == EOF){
3300                 c3 = EOF;
3301                 break;
3302             }
3303             code_status(c1);
3304         }
3305         c0 = 0;
3306         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3307         case -2:
3308             /* 4 bytes UTF-8 */
3309             if (hold_index < hold_count){
3310                 c0 = hold_buf[hold_index++];
3311             } else if ((c0 = (*i_getc)(f)) == EOF) {
3312                 ret = EOF;
3313                 break;
3314             } else {
3315                 code_status(c0);
3316                 c0 <<= 8;
3317                 if (hold_index < hold_count){
3318                     c3 = hold_buf[hold_index++];
3319                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3320                     c0 = ret = EOF;
3321                     break;
3322                 } else {
3323                     code_status(c3);
3324                     (*iconv)(c2, c1, c0|c3);
3325                 }
3326             }
3327             break;
3328         case -1:
3329             /* 3 bytes EUC or UTF-8 */
3330             if (hold_index < hold_count){
3331                 c0 = hold_buf[hold_index++];
3332             } else if ((c0 = (*i_getc)(f)) == EOF) {
3333                 ret = EOF;
3334                 break;
3335             } else {
3336                 code_status(c0);
3337             }
3338             (*iconv)(c2, c1, c0);
3339             break;
3340         }
3341         if (c0 == EOF) break;
3342     }
3343     return ret;
3344 }
3345
3346 nkf_char push_hold_buf(nkf_char c2)
3347 {
3348     if (hold_count >= HOLD_SIZE*2)
3349         return (EOF);
3350     hold_buf[hold_count++] = (unsigned char)c2;
3351     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3352 }
3353
3354 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3355 {
3356 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3357     nkf_char val;
3358 #endif
3359     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3360 #ifdef SHIFTJIS_CP932
3361     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3362         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3363         if (val){
3364             c2 = val >> 8;
3365             c1 = val & 0xff;
3366         }
3367     }
3368     if (cp932inv_f
3369         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3370         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3371         if (c){
3372             c2 = c >> 8;
3373             c1 = c & 0xff;
3374         }
3375     }
3376 #endif /* SHIFTJIS_CP932 */
3377 #ifdef X0212_ENABLE
3378     if (!x0213_f && is_ibmext_in_sjis(c2)){
3379         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3380         if (val){
3381             if (val > 0x7FFF){
3382                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3383                 c1 = val & 0xff;
3384             }else{
3385                 c2 = val >> 8;
3386                 c1 = val & 0xff;
3387             }
3388             if (p2) *p2 = c2;
3389             if (p1) *p1 = c1;
3390             return 0;
3391         }
3392     }
3393 #endif
3394     if(c2 >= 0x80){
3395         if(x0213_f && c2 >= 0xF0){
3396             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3397                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3398             }else{ /* 78<=k<=94 */
3399                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3400                 if (0x9E < c1) c2++;
3401             }
3402         }else{
3403             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3404             if (0x9E < c1) c2++;
3405         }
3406         if (c1 < 0x9F)
3407             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3408         else {
3409             c1 = c1 - 0x7E;
3410         }
3411     }
3412
3413 #ifdef X0212_ENABLE
3414     c2 = x0212_unshift(c2);
3415 #endif
3416     if (p2) *p2 = c2;
3417     if (p1) *p1 = c1;
3418     return 0;
3419 }
3420
3421 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3422 {
3423     if (c2 == JIS_X_0201) {
3424         c1 &= 0x7f;
3425     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3426         /* NOP */
3427     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3428         /* CP932 UDC */
3429         if(c1 == 0x7F) return 0;
3430         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3431         c2 = 0;
3432     } else {
3433         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3434         if (ret) return ret;
3435     }
3436     (*oconv)(c2, c1);
3437     return 0;
3438 }
3439
3440 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3441 {
3442     if (c2 == JIS_X_0201) {
3443         c1 &= 0x7f;
3444 #ifdef X0212_ENABLE
3445     }else if (c2 == 0x8f){
3446         if (c0 == 0){
3447             return -1;
3448         }
3449         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3450             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3451             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3452             c2 = 0;
3453         } else {
3454             c2 = (c2 << 8) | (c1 & 0x7f);
3455             c1 = c0 & 0x7f;
3456 #ifdef SHIFTJIS_CP932
3457             if (cp51932_f){
3458                 nkf_char s2, s1;
3459                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3460                     s2e_conv(s2, s1, &c2, &c1);
3461                     if (c2 < 0x100){
3462                         c1 &= 0x7f;
3463                         c2 &= 0x7f;
3464                     }
3465                 }
3466             }
3467 #endif /* SHIFTJIS_CP932 */
3468         }
3469 #endif /* X0212_ENABLE */
3470     } else if (c2 == SSO){
3471         c2 = JIS_X_0201;
3472         c1 &= 0x7f;
3473     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3474         /* NOP */
3475     } else {
3476         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3477             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3478             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3479             c2 = 0;
3480         } else {
3481             c1 &= 0x7f;
3482             c2 &= 0x7f;
3483 #ifdef SHIFTJIS_CP932
3484             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3485                 nkf_char s2, s1;
3486                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3487                     s2e_conv(s2, s1, &c2, &c1);
3488                     if (c2 < 0x100){
3489                         c1 &= 0x7f;
3490                         c2 &= 0x7f;
3491                     }
3492                 }
3493             }
3494 #endif /* SHIFTJIS_CP932 */
3495         }
3496     }
3497     (*oconv)(c2, c1);
3498     return 0;
3499 }
3500
3501 #ifdef UTF8_INPUT_ENABLE
3502 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3503 {
3504     nkf_char ret = 0;
3505
3506     if (!c1){
3507         *p2 = 0;
3508         *p1 = c2;
3509     }else if (0xc0 <= c2 && c2 <= 0xef) {
3510         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3511 #ifdef NUMCHAR_OPTION
3512         if (ret > 0){
3513             if (p2) *p2 = 0;
3514             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3515             ret = 0;
3516         }
3517 #endif
3518     }
3519     return ret;
3520 }
3521
3522 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3523 {
3524     nkf_char ret = 0;
3525     static const char w_iconv_utf8_1st_byte[] =
3526     { /* 0xC0 - 0xFF */
3527         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3528         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3529         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3530         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3531
3532     if (c2 < 0 || 0xff < c2) {
3533     }else if (c2 == 0) { /* 0 : 1 byte*/
3534         c0 = 0;
3535     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3536         return 0;
3537     } else{
3538         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3539         case 21:
3540             if (c1 < 0x80 || 0xBF < c1) return 0;
3541             break;
3542         case 30:
3543             if (c0 == 0) return -1;
3544             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3545                 return 0;
3546             break;
3547         case 31:
3548         case 33:
3549             if (c0 == 0) return -1;
3550             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3551                 return 0;
3552             break;
3553         case 32:
3554             if (c0 == 0) return -1;
3555             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3556                 return 0;
3557             break;
3558         case 40:
3559             if (c0 == 0) return -2;
3560             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3561                 return 0;
3562             break;
3563         case 41:
3564             if (c0 == 0) return -2;
3565             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3566                 return 0;
3567             break;
3568         case 42:
3569             if (c0 == 0) return -2;
3570             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3571                 return 0;
3572             break;
3573         default:
3574             return 0;
3575             break;
3576         }
3577     }
3578     if (c2 == 0 || c2 == EOF){
3579     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3580         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3581         c2 = 0;
3582     } else {
3583         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3584     }
3585     if (ret == 0){
3586         (*oconv)(c2, c1);
3587     }
3588     return ret;
3589 }
3590 #endif
3591
3592 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3593 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3594 {
3595     val &= VALUE_MASK;
3596     if (val < 0x80){
3597         *p2 = val;
3598         *p1 = 0;
3599         *p0 = 0;
3600     }else if (val < 0x800){
3601         *p2 = 0xc0 | (val >> 6);
3602         *p1 = 0x80 | (val & 0x3f);
3603         *p0 = 0;
3604     } else if (val <= NKF_INT32_C(0xFFFF)) {
3605         *p2 = 0xe0 | (val >> 12);
3606         *p1 = 0x80 | ((val >> 6) & 0x3f);
3607         *p0 = 0x80 | (val        & 0x3f);
3608     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3609         *p2 = 0xe0 |  (val >> 16);
3610         *p1 = 0x80 | ((val >> 12) & 0x3f);
3611         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3612     } else {
3613         *p2 = 0;
3614         *p1 = 0;
3615         *p0 = 0;
3616     }
3617 }
3618 #endif
3619
3620 #ifdef UTF8_INPUT_ENABLE
3621 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3622 {
3623     nkf_char val;
3624     if (c2 >= 0xf8) {
3625         val = -1;
3626     } else if (c2 >= 0xf0){
3627         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3628         val = (c2 & 0x0f) << 18;
3629         val |= (c1 & 0x3f) << 12;
3630         val |= (c0 & 0x3f00) >> 2;
3631         val |= (c0 & 0x3f);
3632     }else if (c2 >= 0xe0){
3633         val = (c2 & 0x0f) << 12;
3634         val |= (c1 & 0x3f) << 6;
3635         val |= (c0 & 0x3f);
3636     }else if (c2 >= 0xc0){
3637         val = (c2 & 0x1f) << 6;
3638         val |= (c1 & 0x3f);
3639     }else{
3640         val = c2;
3641     }
3642     return val;
3643 }
3644
3645 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3646 {
3647     nkf_char c2, c1, c0;
3648     nkf_char ret = 0;
3649     val &= VALUE_MASK;
3650     if (val < 0x80){
3651         *p2 = 0;
3652         *p1 = val;
3653     }else{
3654         w16w_conv(val, &c2, &c1, &c0);
3655         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3656 #ifdef NUMCHAR_OPTION
3657         if (ret > 0){
3658             *p2 = 0;
3659             *p1 = CLASS_UNICODE | val;
3660             ret = 0;
3661         }
3662 #endif
3663     }
3664     return ret;
3665 }
3666 #endif
3667
3668 #ifdef UTF8_INPUT_ENABLE
3669 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3670 {
3671     nkf_char ret = 0;
3672     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3673         (*oconv)(c2, c1);
3674         return 0;
3675     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3676         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3677             return -2;
3678         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3679         c2 = 0;
3680     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3681         /*
3682            return 2;
3683         */
3684         return 1;
3685     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3686     if (ret) return ret;
3687     (*oconv)(c2, c1);
3688     return 0;
3689 }
3690
3691 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3692 {
3693     int ret = 0;
3694
3695     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3696     } else if (is_unicode_bmp(c1)) {
3697         ret = w16e_conv(c1, &c2, &c1);
3698     } else {
3699         c2 = 0;
3700         c1 =  CLASS_UNICODE | c1;
3701     }
3702     if (ret) return ret;
3703     (*oconv)(c2, c1);
3704     return 0;
3705 }
3706
3707 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3708 {
3709     const unsigned short *const *pp;
3710     const unsigned short *const *const *ppp;
3711     static const char no_best_fit_chars_table_C2[] =
3712     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3713         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3714         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3715         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3716     static const char no_best_fit_chars_table_C2_ms[] =
3717     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3718         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3719         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3720         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3721     static const char no_best_fit_chars_table_932_C2[] =
3722     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3723         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3724         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3725         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3726     static const char no_best_fit_chars_table_932_C3[] =
3727     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3728         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3729         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3730         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3731     nkf_char ret = 0;
3732
3733     if(c2 < 0x80){
3734         *p2 = 0;
3735         *p1 = c2;
3736     }else if(c2 < 0xe0){
3737         if(no_best_fit_chars_f){
3738             if(ms_ucs_map_f == UCS_MAP_CP932){
3739                 switch(c2){
3740                 case 0xC2:
3741                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3742                     break;
3743                 case 0xC3:
3744                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3745                     break;
3746                 }
3747             }else if(!cp932inv_f){
3748                 switch(c2){
3749                 case 0xC2:
3750                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3751                     break;
3752                 case 0xC3:
3753                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3754                     break;
3755                 }
3756             }else if(ms_ucs_map_f == UCS_MAP_MS){
3757                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3758             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3759                 switch(c2){
3760                 case 0xC2:
3761                     switch(c1){
3762                     case 0xA2:
3763                     case 0xA3:
3764                     case 0xA5:
3765                     case 0xA6:
3766                     case 0xAC:
3767                     case 0xAF:
3768                     case 0xB8:
3769                         return 1;
3770                     }
3771                     break;
3772                 }
3773             }
3774         }
3775         pp =
3776             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3777             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3778             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3779             utf8_to_euc_2bytes;
3780         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3781     }else if(c0 < 0xF0){
3782         if(no_best_fit_chars_f){
3783             if(ms_ucs_map_f == UCS_MAP_CP932){
3784                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3785             }else if(ms_ucs_map_f == UCS_MAP_MS){
3786                 switch(c2){
3787                 case 0xE2:
3788                     switch(c1){
3789                     case 0x80:
3790                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3791                         break;
3792                     case 0x88:
3793                         if(c0 == 0x92) return 1;
3794                         break;
3795                     }
3796                     break;
3797                 case 0xE3:
3798                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3799                     break;
3800                 }
3801             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3802                 switch(c2){
3803                 case 0xE3:
3804                     switch(c1){
3805                     case 0x82:
3806                             if(c0 == 0x94) return 1;
3807                         break;
3808                     case 0x83:
3809                             if(c0 == 0xBB) return 1;
3810                         break;
3811                     }
3812                     break;
3813                 }
3814             }else{
3815                 switch(c2){
3816                 case 0xE2:
3817                     switch(c1){
3818                     case 0x80:
3819                         if(c0 == 0x95) return 1;
3820                         break;
3821                     case 0x88:
3822                         if(c0 == 0xA5) return 1;
3823                         break;
3824                     }
3825                     break;
3826                 case 0xEF:
3827                     switch(c1){
3828                     case 0xBC:
3829                         if(c0 == 0x8D) return 1;
3830                         break;
3831                     case 0xBD:
3832                         if(c0 == 0x9E && !cp932inv_f) return 1;
3833                         break;
3834                     case 0xBF:
3835                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3836                         break;
3837                     }
3838                     break;
3839                 }
3840             }
3841         }
3842         ppp =
3843             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3844             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3845             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3846             utf8_to_euc_3bytes;
3847         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3848     }else return -1;
3849 #ifdef SHIFTJIS_CP932
3850     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3851         nkf_char s2, s1;
3852         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3853             s2e_conv(s2, s1, p2, p1);
3854         }else{
3855             ret = 1;
3856         }
3857     }
3858 #endif
3859     return ret;
3860 }
3861
3862 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3863 {
3864     nkf_char c2;
3865     const unsigned short *p;
3866     unsigned short val;
3867
3868     if (pp == 0) return 1;
3869
3870     c1 -= 0x80;
3871     if (c1 < 0 || psize <= c1) return 1;
3872     p = pp[c1];
3873     if (p == 0)  return 1;
3874
3875     c0 -= 0x80;
3876     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3877     val = p[c0];
3878     if (val == 0) return 1;
3879     if (no_cp932ext_f && (
3880         (val>>8) == 0x2D || /* NEC special characters */
3881         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3882         )) return 1;
3883
3884     c2 = val >> 8;
3885    if (val > 0x7FFF){
3886         c2 &= 0x7f;
3887         c2 |= PREFIX_EUCG3;
3888     }
3889     if (c2 == SO) c2 = JIS_X_0201;
3890     c1 = val & 0x7f;
3891     if (p2) *p2 = c2;
3892     if (p1) *p1 = c1;
3893     return 0;
3894 }
3895
3896 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3897 {
3898     int shift = 20;
3899     c &= VALUE_MASK;
3900     while(shift >= 0){
3901         if(c >= 1<<shift){
3902             while(shift >= 0){
3903                 (*f)(0, bin2hex(c>>shift));
3904                 shift -= 4;
3905             }
3906         }else{
3907             shift -= 4;
3908         }
3909     }
3910     return;
3911 }
3912
3913 void encode_fallback_html(nkf_char c)
3914 {
3915     (*oconv)(0, '&');
3916     (*oconv)(0, '#');
3917     c &= VALUE_MASK;
3918     if(c >= NKF_INT32_C(1000000))
3919         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3920     if(c >= NKF_INT32_C(100000))
3921         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3922     if(c >= 10000)
3923         (*oconv)(0, 0x30+(c/10000  )%10);
3924     if(c >= 1000)
3925         (*oconv)(0, 0x30+(c/1000   )%10);
3926     if(c >= 100)
3927         (*oconv)(0, 0x30+(c/100    )%10);
3928     if(c >= 10)
3929         (*oconv)(0, 0x30+(c/10     )%10);
3930     if(c >= 0)
3931         (*oconv)(0, 0x30+ c         %10);
3932     (*oconv)(0, ';');
3933     return;
3934 }
3935
3936 void encode_fallback_xml(nkf_char c)
3937 {
3938     (*oconv)(0, '&');
3939     (*oconv)(0, '#');
3940     (*oconv)(0, 'x');
3941     nkf_each_char_to_hex(oconv, c);
3942     (*oconv)(0, ';');
3943     return;
3944 }
3945
3946 void encode_fallback_java(nkf_char c)
3947 {
3948     (*oconv)(0, '\\');
3949     c &= VALUE_MASK;
3950     if(!is_unicode_bmp(c)){
3951         (*oconv)(0, 'U');
3952         (*oconv)(0, '0');
3953         (*oconv)(0, '0');
3954         (*oconv)(0, bin2hex(c>>20));
3955         (*oconv)(0, bin2hex(c>>16));
3956     }else{
3957         (*oconv)(0, 'u');
3958     }
3959     (*oconv)(0, bin2hex(c>>12));
3960     (*oconv)(0, bin2hex(c>> 8));
3961     (*oconv)(0, bin2hex(c>> 4));
3962     (*oconv)(0, bin2hex(c    ));
3963     return;
3964 }
3965
3966 void encode_fallback_perl(nkf_char c)
3967 {
3968     (*oconv)(0, '\\');
3969     (*oconv)(0, 'x');
3970     (*oconv)(0, '{');
3971     nkf_each_char_to_hex(oconv, c);
3972     (*oconv)(0, '}');
3973     return;
3974 }
3975
3976 void encode_fallback_subchar(nkf_char c)
3977 {
3978     c = unicode_subchar;
3979     (*oconv)((c>>8)&0xFF, c&0xFF);
3980     return;
3981 }
3982 #endif
3983
3984 #ifdef UTF8_OUTPUT_ENABLE
3985 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3986 {
3987     const unsigned short *p;
3988
3989     if (c2 == JIS_X_0201) {
3990         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3991             switch (c1) {
3992             case 0x20:
3993                 return 0xA0;
3994             case 0x7D:
3995                 return 0xA9;
3996             }
3997         }
3998         p = euc_to_utf8_1byte;
3999 #ifdef X0212_ENABLE
4000     } else if (is_eucg3(c2)){
4001         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
4002             return 0xA6;
4003         }
4004         c2 = (c2&0x7f) - 0x21;
4005         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4006             p = x0212_to_utf8_2bytes[c2];
4007         else
4008             return 0;
4009 #endif
4010     } else {
4011         c2 &= 0x7f;
4012         c2 = (c2&0x7f) - 0x21;
4013         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4014             p =
4015                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
4016                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
4017                 euc_to_utf8_2bytes_ms[c2];
4018         else
4019             return 0;
4020     }
4021     if (!p) return 0;
4022     c1 = (c1 & 0x7f) - 0x21;
4023     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
4024         return p[c1];
4025     return 0;
4026 }
4027
4028 void w_oconv(nkf_char c2, nkf_char c1)
4029 {
4030     nkf_char c0;
4031     nkf_char val;
4032
4033     if (output_bom_f) {
4034         output_bom_f = FALSE;
4035         (*o_putc)('\357');
4036         (*o_putc)('\273');
4037         (*o_putc)('\277');
4038     }
4039
4040     if (c2 == EOF) {
4041         (*o_putc)(EOF);
4042         return;
4043     }
4044
4045 #ifdef NUMCHAR_OPTION
4046     if (c2 == 0 && is_unicode_capsule(c1)){
4047         val = c1 & VALUE_MASK;
4048         if (val < 0x80){
4049             (*o_putc)(val);
4050         }else if (val < 0x800){
4051             (*o_putc)(0xC0 | (val >> 6));
4052             (*o_putc)(0x80 | (val & 0x3f));
4053         } else if (val <= NKF_INT32_C(0xFFFF)) {
4054             (*o_putc)(0xE0 | (val >> 12));
4055             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
4056             (*o_putc)(0x80 | (val        & 0x3f));
4057         } else if (val <= NKF_INT32_C(0x10FFFF)) {
4058             (*o_putc)(0xF0 | ( val>>18));
4059             (*o_putc)(0x80 | ((val>>12) & 0x3f));
4060             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
4061             (*o_putc)(0x80 | ( val      & 0x3f));
4062         }
4063         return;
4064     }
4065 #endif
4066
4067     if (c2 == 0) {
4068         output_mode = ASCII;
4069         (*o_putc)(c1);
4070     } else if (c2 == ISO_8859_1) {
4071         output_mode = UTF_8;
4072         (*o_putc)(c1 | 0x080);
4073     } else {
4074         output_mode = UTF_8;
4075         val = e2w_conv(c2, c1);
4076         if (val){
4077             w16w_conv(val, &c2, &c1, &c0);
4078             (*o_putc)(c2);
4079             if (c1){
4080                 (*o_putc)(c1);
4081                 if (c0) (*o_putc)(c0);
4082             }
4083         }
4084     }
4085 }
4086
4087 void w_oconv16(nkf_char c2, nkf_char c1)
4088 {
4089     if (output_bom_f) {
4090         output_bom_f = FALSE;
4091         if (output_endian == ENDIAN_LITTLE){
4092             (*o_putc)((unsigned char)'\377');
4093             (*o_putc)('\376');
4094         }else{
4095             (*o_putc)('\376');
4096             (*o_putc)((unsigned char)'\377');
4097         }
4098     }
4099
4100     if (c2 == EOF) {
4101         (*o_putc)(EOF);
4102         return;
4103     }
4104
4105     if (c2 == ISO_8859_1) {
4106         c2 = 0;
4107         c1 |= 0x80;
4108 #ifdef NUMCHAR_OPTION
4109     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4110         if (is_unicode_bmp(c1)) {
4111             c2 = (c1 >> 8) & 0xff;
4112             c1 &= 0xff;
4113         } else {
4114             c1 &= VALUE_MASK;
4115             if (c1 <= UNICODE_MAX) {
4116                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
4117                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
4118                 if (output_endian == ENDIAN_LITTLE){
4119                     (*o_putc)(c2 & 0xff);
4120                     (*o_putc)((c2 >> 8) & 0xff);
4121                     (*o_putc)(c1 & 0xff);
4122                     (*o_putc)((c1 >> 8) & 0xff);
4123                 }else{
4124                     (*o_putc)((c2 >> 8) & 0xff);
4125                     (*o_putc)(c2 & 0xff);
4126                     (*o_putc)((c1 >> 8) & 0xff);
4127                     (*o_putc)(c1 & 0xff);
4128                 }
4129             }
4130             return;
4131         }
4132 #endif
4133     } else if (c2) {
4134         nkf_char val = e2w_conv(c2, c1);
4135         c2 = (val >> 8) & 0xff;
4136         c1 = val & 0xff;
4137         if (!val) return;
4138     }
4139     if (output_endian == ENDIAN_LITTLE){
4140         (*o_putc)(c1);
4141         (*o_putc)(c2);
4142     }else{
4143         (*o_putc)(c2);
4144         (*o_putc)(c1);
4145     }
4146 }
4147
4148 void w_oconv32(nkf_char c2, nkf_char c1)
4149 {
4150     if (output_bom_f) {
4151         output_bom_f = FALSE;
4152         if (output_endian == ENDIAN_LITTLE){
4153             (*o_putc)((unsigned char)'\377');
4154             (*o_putc)('\376');
4155             (*o_putc)('\000');
4156             (*o_putc)('\000');
4157         }else{
4158             (*o_putc)('\000');
4159             (*o_putc)('\000');
4160             (*o_putc)('\376');
4161             (*o_putc)((unsigned char)'\377');
4162         }
4163     }
4164
4165     if (c2 == EOF) {
4166         (*o_putc)(EOF);
4167         return;
4168     }
4169
4170     if (c2 == ISO_8859_1) {
4171         c1 |= 0x80;
4172 #ifdef NUMCHAR_OPTION
4173     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4174         c1 &= VALUE_MASK;
4175 #endif
4176     } else if (c2) {
4177         c1 = e2w_conv(c2, c1);
4178         if (!c1) return;
4179     }
4180     if (output_endian == ENDIAN_LITTLE){
4181         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4182         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4183         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4184         (*o_putc)('\000');
4185     }else{
4186         (*o_putc)('\000');
4187         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4188         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4189         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4190     }
4191 }
4192 #endif
4193
4194 void e_oconv(nkf_char c2, nkf_char c1)
4195 {
4196 #ifdef NUMCHAR_OPTION
4197     if (c2 == 0 && is_unicode_capsule(c1)){
4198         w16e_conv(c1, &c2, &c1);
4199         if (c2 == 0 && is_unicode_capsule(c1)){
4200             c2 = c1 & VALUE_MASK;
4201             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
4202                 /* eucJP-ms UDC */
4203                 c1 &= 0xFFF;
4204                 c2 = c1 / 94;
4205                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
4206                 c1 = 0x21 + c1 % 94;
4207                 if (is_eucg3(c2)){
4208                     (*o_putc)(0x8f);
4209                     (*o_putc)((c2 & 0x7f) | 0x080);
4210                     (*o_putc)(c1 | 0x080);
4211                 }else{
4212                     (*o_putc)((c2 & 0x7f) | 0x080);
4213                     (*o_putc)(c1 | 0x080);
4214                 }
4215                 return;
4216             } else {
4217                 if (encode_fallback) (*encode_fallback)(c1);
4218                 return;
4219             }
4220         }
4221     }
4222 #endif
4223     if (c2 == EOF) {
4224         (*o_putc)(EOF);
4225         return;
4226     } else if (c2 == 0) {
4227         output_mode = ASCII;
4228         (*o_putc)(c1);
4229     } else if (c2 == JIS_X_0201) {
4230         output_mode = EUC_JP;
4231         (*o_putc)(SSO); (*o_putc)(c1|0x80);
4232     } else if (c2 == ISO_8859_1) {
4233         output_mode = ISO_8859_1;
4234         (*o_putc)(c1 | 0x080);
4235 #ifdef X0212_ENABLE
4236     } else if (is_eucg3(c2)){
4237         output_mode = EUC_JP;
4238 #ifdef SHIFTJIS_CP932
4239         if (!cp932inv_f){
4240             nkf_char s2, s1;
4241             if (e2s_conv(c2, c1, &s2, &s1) == 0){
4242                 s2e_conv(s2, s1, &c2, &c1);
4243             }
4244         }
4245 #endif
4246         if (c2 == 0) {
4247             output_mode = ASCII;
4248             (*o_putc)(c1);
4249         }else if (is_eucg3(c2)){
4250             if (x0212_f){
4251                 (*o_putc)(0x8f);
4252                 (*o_putc)((c2 & 0x7f) | 0x080);
4253                 (*o_putc)(c1 | 0x080);
4254             }
4255         }else{
4256             (*o_putc)((c2 & 0x7f) | 0x080);
4257             (*o_putc)(c1 | 0x080);
4258         }
4259 #endif
4260     } else {
4261         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
4262             set_iconv(FALSE, 0);
4263             return; /* too late to rescue this char */
4264         }
4265         output_mode = EUC_JP;
4266         (*o_putc)(c2 | 0x080);
4267         (*o_putc)(c1 | 0x080);
4268     }
4269 }
4270
4271 #ifdef X0212_ENABLE
4272 nkf_char x0212_shift(nkf_char c)
4273 {
4274     nkf_char ret = c;
4275     c &= 0x7f;
4276     if (is_eucg3(ret)){
4277         if (0x75 <= c && c <= 0x7f){
4278             ret = c + (0x109 - 0x75);
4279         }
4280     }else{
4281         if (0x75 <= c && c <= 0x7f){
4282             ret = c + (0x113 - 0x75);
4283         }
4284     }
4285     return ret;
4286 }
4287
4288
4289 nkf_char x0212_unshift(nkf_char c)
4290 {
4291     nkf_char ret = c;
4292     if (0x7f <= c && c <= 0x88){
4293         ret = c + (0x75 - 0x7f);
4294     }else if (0x89 <= c && c <= 0x92){