OSDN Git Service

* define HELP_OUTPUT and help and version messages are now output to stdout.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.158 2007/12/23 07:25:47 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-12-22"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #if defined(DEFAULT_CODE_JIS)
44 #elif defined(DEFAULT_CODE_SJIS)
45 #elif defined(DEFAULT_CODE_EUC)
46 #elif defined(DEFAULT_CODE_UTF8)
47 #else
48 #define DEFAULT_CODE_JIS 1
49 #endif
50
51 #ifndef MIME_DECODE_DEFAULT
52 #define MIME_DECODE_DEFAULT STRICT_MIME
53 #endif
54 #ifndef X0201_DEFAULT
55 #define X0201_DEFAULT TRUE
56 #endif
57
58 #if DEFAULT_NEWLINE == 0x0D0A
59 #define PUT_NEWLINE(func) do {\
60     func(0x0D);\
61     func(0x0A);\
62 } while (0)
63 #define OCONV_NEWLINE(func) do {\
64     func(0, 0x0D);\
65     func(0, 0x0A);\
66 } while (0)
67 #elif DEFAULT_NEWLINE == 0x0D
68 #define PUT_NEWLINE(func) func(0x0D)
69 #define OCONV_NEWLINE(func) func(0, 0x0D)
70 #else
71 #define DEFAULT_NEWLINE 0x0A
72 #define PUT_NEWLINE(func) func(0x0A)
73 #define OCONV_NEWLINE(func) func(0, 0x0A)
74 #endif
75 #ifdef HELP_OUTPUT_STDERR
76 #define HELP_OUTPUT stderr
77 #else
78 #define HELP_OUTPUT stdout
79 #endif
80
81 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
82 #define MSDOS
83 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
84 #define __WIN32__
85 #endif
86 #endif
87
88 #ifdef PERL_XS
89 #undef OVERWRITE
90 #endif
91
92 #ifndef PERL_XS
93 #include <stdio.h>
94 #endif
95
96 #include <stdlib.h>
97 #include <string.h>
98
99 #if defined(MSDOS) || defined(__OS2__)
100 #include <fcntl.h>
101 #include <io.h>
102 #if defined(_MSC_VER) || defined(__WATCOMC__)
103 #define mktemp _mktemp
104 #endif
105 #endif
106
107 #ifdef MSDOS
108 #ifdef LSI_C
109 #define setbinmode(fp) fsetbin(fp)
110 #elif defined(__DJGPP__)
111 #include <libc/dosio.h>
112 #define setbinmode(fp) djgpp_setbinmode(fp)
113 #else /* Microsoft C, Turbo C */
114 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
115 #endif
116 #else /* UNIX */
117 #define setbinmode(fp)
118 #endif
119
120 #if defined(__DJGPP__)
121 void  djgpp_setbinmode(FILE *fp)
122 {
123     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
124     int fd, m;
125     fd = fileno(fp);
126     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
127     __file_handle_set(fd, m);
128 }
129 #endif
130
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
133 #else /* BSD */
134 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
135 #endif
136
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
139 #define         EASYWIN
140 #ifndef __WIN16__
141 #define __WIN16__
142 #endif
143 #include <windows.h>
144 #endif
145
146 #ifdef OVERWRITE
147 /* added by satoru@isoternet.org */
148 #if defined(__EMX__)
149 #include <sys/types.h>
150 #endif
151 #include <sys/stat.h>
152 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
153 #include <unistd.h>
154 #if defined(__WATCOMC__)
155 #include <sys/utime.h>
156 #else
157 #include <utime.h>
158 #endif
159 #else /* defined(MSDOS) */
160 #ifdef __WIN32__
161 #ifdef __BORLANDC__ /* BCC32 */
162 #include <utime.h>
163 #else /* !defined(__BORLANDC__) */
164 #include <sys/utime.h>
165 #endif /* (__BORLANDC__) */
166 #else /* !defined(__WIN32__) */
167 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
168 #include <sys/utime.h>
169 #elif defined(__TURBOC__) /* BCC */
170 #include <utime.h>
171 #elif defined(LSI_C) /* LSI C */
172 #endif /* (__WIN32__) */
173 #endif
174 #endif
175 #endif
176
177 #define         FALSE   0
178 #define         TRUE    1
179
180 /* state of output_mode and input_mode
181
182    c2           0 means ASCII
183                 JIS_X_0201
184                 ISO_8859_1
185                 JIS_X_0208
186                 EOF      all termination
187    c1           32bit data
188
189  */
190
191 /* Input Assumption */
192
193 #define         JIS_INPUT       4
194 #define         EUC_INPUT      16
195 #define         SJIS_INPUT      5
196 #define         LATIN1_INPUT    6
197 #define         UTF8_INPUT     13
198 #define         UTF16_INPUT    1015
199 #define         UTF32_INPUT    1017
200
201 #define         FIXED_MIME      7
202 #define         STRICT_MIME     8
203
204 /* MIME ENCODE */
205
206
207 /* byte order */
208
209 #define         ENDIAN_BIG      1234
210 #define         ENDIAN_LITTLE   4321
211 #define         ENDIAN_2143     2143
212 #define         ENDIAN_3412     3412
213
214 /* ASCII CODE */
215
216 #define         BS      0x08
217 #define         TAB     0x09
218 #define         LF      0x0a
219 #define         CR      0x0d
220 #define         ESC     0x1b
221 #define         SP      0x20
222 #define         AT      0x40
223 #define         SSP     0xa0
224 #define         DEL     0x7f
225 #define         SI      0x0f
226 #define         SO      0x0e
227 #define         SSO     0x8e
228 #define         SS3     0x8f
229 #define         CRLF    0x0D0A
230
231
232 /* encodings */
233
234 enum nkf_encodings {
235     ASCII,
236     ISO_8859_1,
237     ISO_2022_JP,
238     CP50220,
239     CP50221,
240     CP50222,
241     ISO_2022_JP_1,
242     ISO_2022_JP_3,
243     SHIFT_JIS,
244     WINDOWS_31J,
245     CP10001,
246     EUC_JP,
247     CP51932,
248     EUCJP_MS,
249     EUCJP_ASCII,
250     SHIFT_JISX0213,
251     SHIFT_JIS_2004,
252     EUC_JISX0213,
253     EUC_JIS_2004,
254     UTF_8,
255     UTF_8N,
256     UTF_8_BOM,
257     UTF8_MAC,
258     UTF_16,
259     UTF_16BE,
260     UTF_16BE_BOM,
261     UTF_16LE,
262     UTF_16LE_BOM,
263     UTF_32,
264     UTF_32BE,
265     UTF_32BE_BOM,
266     UTF_32LE,
267     UTF_32LE_BOM,
268     JIS_X_0201=0x1000,
269     JIS_X_0208,
270     JIS_X_0212,
271     JIS_X_0213_1,
272     JIS_X_0213_2,
273     BINARY
274 };
275
276 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
277 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
278 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
279 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
280 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
281 void j_oconv(nkf_char c2, nkf_char c1);
282 void s_oconv(nkf_char c2, nkf_char c1);
283 void e_oconv(nkf_char c2, nkf_char c1);
284 void w_oconv(nkf_char c2, nkf_char c1);
285 void w_oconv16(nkf_char c2, nkf_char c1);
286 void w_oconv32(nkf_char c2, nkf_char c1);
287
288 typedef struct {
289     char *name;
290     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
291     void (*oconv_func)(nkf_char c2, nkf_char c1);
292 } nkf_native_encoding;
293
294 nkf_native_encoding NkfEncodingASCII =          { "US_ASCII", e_iconv, e_oconv };
295 nkf_native_encoding NkfEncodingISO_2022_JP =    { "ISO-2022-JP", e_iconv, j_oconv };
296 nkf_native_encoding NkfEncodingShift_JIS =      { "Shift_JIS", s_iconv, s_oconv };
297 nkf_native_encoding NkfEncodingEUC_JP =         { "EUC-JP", e_iconv, e_oconv };
298 nkf_native_encoding NkfEncodingUTF_8 =          { "UTF-8", w_iconv, w_oconv };
299 nkf_native_encoding NkfEncodingUTF_16 =         { "UTF-16", w_iconv16, w_oconv16 };
300 nkf_native_encoding NkfEncodingUTF_32 =         { "UTF-32", w_iconv32, w_oconv32 };
301
302 typedef struct {
303     int id;
304     char *name;
305     nkf_native_encoding *based_encoding;
306 } nkf_encoding;
307 nkf_encoding nkf_encoding_table[] = {
308     {ASCII,             "ASCII",                &NkfEncodingASCII},
309     {ISO_8859_1,        "ISO-8859-1",           &NkfEncodingASCII},
310     {ISO_2022_JP,       "ISO-2022-JP",          &NkfEncodingASCII},
311     {CP50220,           "CP50220",              &NkfEncodingISO_2022_JP},
312     {CP50221,           "CP50221",              &NkfEncodingISO_2022_JP},
313     {CP50222,           "CP50222",              &NkfEncodingISO_2022_JP},
314     {ISO_2022_JP_1,     "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
315     {ISO_2022_JP_3,     "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
316     {SHIFT_JIS,         "Shift_JIS",            &NkfEncodingShift_JIS},
317     {WINDOWS_31J,       "WINDOWS-31J",          &NkfEncodingShift_JIS},
318     {CP10001,           "CP10001",              &NkfEncodingShift_JIS},
319     {EUC_JP,            "EUC-JP",               &NkfEncodingEUC_JP},
320     {CP51932,           "CP51932",              &NkfEncodingEUC_JP},
321     {EUCJP_MS,          "eucJP-MS",             &NkfEncodingEUC_JP},
322     {EUCJP_ASCII,       "eucJP-ASCII",          &NkfEncodingEUC_JP},
323     {SHIFT_JISX0213,    "Shift_JISX0213",       &NkfEncodingShift_JIS},
324     {SHIFT_JIS_2004,    "Shift_JIS-2004",       &NkfEncodingShift_JIS},
325     {EUC_JISX0213,      "EUC-JISX0213",         &NkfEncodingEUC_JP},
326     {EUC_JIS_2004,      "EUC-JIS-2004",         &NkfEncodingEUC_JP},
327     {UTF_8,             "UTF-8",                &NkfEncodingUTF_8},
328     {UTF_8N,            "UTF-8N",               &NkfEncodingUTF_8},
329     {UTF_8_BOM,         "UTF-8-BOM",            &NkfEncodingUTF_8},
330     {UTF8_MAC,          "UTF8-MAC",             &NkfEncodingUTF_8},
331     {UTF_16,            "UTF-16",               &NkfEncodingUTF_16},
332     {UTF_16BE,          "UTF-16BE",             &NkfEncodingUTF_16},
333     {UTF_16BE_BOM,      "UTF-16BE-BOM",         &NkfEncodingUTF_16},
334     {UTF_16LE,          "UTF-16LE",             &NkfEncodingUTF_16},
335     {UTF_16LE_BOM,      "UTF-16LE-BOM",         &NkfEncodingUTF_16},
336     {UTF_32,            "UTF-32",               &NkfEncodingUTF_32},
337     {UTF_32BE,          "UTF-32BE",             &NkfEncodingUTF_32},
338     {UTF_32BE_BOM,      "UTF-32BE-BOM",         &NkfEncodingUTF_32},
339     {UTF_32LE,          "UTF-32LE",             &NkfEncodingUTF_32},
340     {UTF_32LE_BOM,      "UTF-32LE-BOM",         &NkfEncodingUTF_32},
341     {BINARY,            "BINARY",               &NkfEncodingASCII},
342     {-1,                NULL,                   NULL}
343 };
344 #define NKF_ENCODING_TABLE_SIZE 34
345 struct {
346     const char *name;
347     const int id;
348 } encoding_name_to_id_table[] = {
349     {"ASCII",                   ASCII},
350     {"ISO-2022-JP",             ISO_2022_JP},
351     {"X-ISO2022JP-CP932",       CP50220},
352     {"CP50220",                 CP50220},
353     {"CP50221",                 CP50221},
354     {"CP50222",                 CP50222},
355     {"ISO-2022-JP-1",           ISO_2022_JP_1},
356     {"ISO-2022-JP-3",           ISO_2022_JP_3},
357     {"SHIFT_JIS",               SHIFT_JIS},
358     {"SJIS",                    SHIFT_JIS},
359     {"WINDOWS-31J",             WINDOWS_31J},
360     {"CSWINDOWS31J",            WINDOWS_31J},
361     {"CP932",                   WINDOWS_31J},
362     {"MS932",                   WINDOWS_31J},
363     {"CP10001",                 CP10001},
364     {"EUCJP",                   EUC_JP},
365     {"EUC-JP",                  EUC_JP},
366     {"CP51932",                 CP51932},
367     {"EUC-JP-MS",               EUCJP_MS},
368     {"EUCJP-MS",                EUCJP_MS},
369     {"EUCJPMS",                 EUCJP_MS},
370     {"EUC-JP-ASCII",            EUCJP_ASCII},
371     {"EUCJP-ASCII",             EUCJP_ASCII},
372     {"SHIFT_JISX0213",          SHIFT_JISX0213},
373     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
374     {"EUC-JISX0213",            EUC_JISX0213},
375     {"EUC-JIS-2004",            EUC_JIS_2004},
376     {"UTF-8",                   UTF_8},
377     {"UTF-8N",                  UTF_8N},
378     {"UTF-8-BOM",               UTF_8_BOM},
379     {"UTF8-MAC",                UTF8_MAC},
380     {"UTF-8-MAC",               UTF8_MAC},
381     {"UTF-16",                  UTF_16},
382     {"UTF-16BE",                UTF_16BE},
383     {"UTF-16BE-BOM",            UTF_16BE_BOM},
384     {"UTF-16LE",                UTF_16LE},
385     {"UTF-16LE-BOM",            UTF_16LE_BOM},
386     {"UTF-32",                  UTF_32},
387     {"UTF-32BE",                UTF_32BE},
388     {"UTF-32BE-BOM",            UTF_32BE_BOM},
389     {"UTF-32LE",                UTF_32LE},
390     {"UTF-32LE-BOM",            UTF_32LE_BOM},
391     {"BINARY",                  BINARY},
392     {NULL,                      -1}
393 };
394 #if defined(DEFAULT_CODE_JIS)
395 #define     DEFAULT_ENCODING ISO_2022_JP
396 #elif defined(DEFAULT_CODE_SJIS)
397 #define     DEFAULT_ENCODING SHIFT_JIS
398 #elif defined(DEFAULT_CODE_EUC)
399 #define     DEFAULT_ENCODING EUC_JP
400 #elif defined(DEFAULT_CODE_UTF8)
401 #define     DEFAULT_ENCODING UTF_8
402 #endif
403
404
405 #define         is_alnum(c)  \
406             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
407
408 /* I don't trust portablity of toupper */
409 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
410 #define nkf_isoctal(c)  ('0'<=c && c<='7')
411 #define nkf_isdigit(c)  ('0'<=c && c<='9')
412 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
413 #define nkf_isblank(c) (c == SP || c == TAB)
414 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
415 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
416 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
417 #define nkf_isprint(c) (SP<=c && c<='~')
418 #define nkf_isgraph(c) ('!'<=c && c<='~')
419 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
420                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
421                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
422 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
423 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
424 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
425     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
426      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
427
428 #define CP932_TABLE_BEGIN 0xFA
429 #define CP932_TABLE_END   0xFC
430 #define CP932INV_TABLE_BEGIN 0xED
431 #define CP932INV_TABLE_END   0xEE
432 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
433
434 #define         HOLD_SIZE       1024
435 #if defined(INT_IS_SHORT)
436 #define         IOBUF_SIZE      2048
437 #else
438 #define         IOBUF_SIZE      16384
439 #endif
440
441 #define         DEFAULT_J       'B'
442 #define         DEFAULT_R       'B'
443
444 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
445 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
446
447 #define         RANGE_NUM_MAX   18
448 #define         GETA1   0x22
449 #define         GETA2   0x2e
450
451
452 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
453 #define sizeof_euc_to_utf8_1byte 94
454 #define sizeof_euc_to_utf8_2bytes 94
455 #define sizeof_utf8_to_euc_C2 64
456 #define sizeof_utf8_to_euc_E5B8 64
457 #define sizeof_utf8_to_euc_2bytes 112
458 #define sizeof_utf8_to_euc_3bytes 16
459 #endif
460
461 /* MIME preprocessor */
462
463 #ifdef EASYWIN /*Easy Win */
464 extern POINT _BufferSize;
465 #endif
466
467 struct input_code{
468     char *name;
469     nkf_char stat;
470     nkf_char score;
471     nkf_char index;
472     nkf_char buf[3];
473     void (*status_func)(struct input_code *, nkf_char);
474     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
475     int _file_stat;
476 };
477
478 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
479 static nkf_encoding *output_encoding;
480
481 #if !defined(PERL_XS) && !defined(WIN32DLL)
482 static  nkf_char     noconvert(FILE *f);
483 #endif
484 static  void    module_connection(void);
485 static  nkf_char     kanji_convert(FILE *f);
486 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
487 static  nkf_char     push_hold_buf(nkf_char c2);
488 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
489 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
490 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
491 /* UCS Mapping
492  * 0: Shift_JIS, eucJP-ascii
493  * 1: eucJP-ms
494  * 2: CP932, CP51932
495  * 3: CP10001
496  */
497 #define UCS_MAP_ASCII   0
498 #define UCS_MAP_MS      1
499 #define UCS_MAP_CP932   2
500 #define UCS_MAP_CP10001 3
501 static int ms_ucs_map_f = UCS_MAP_ASCII;
502 #endif
503 #ifdef UTF8_INPUT_ENABLE
504 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
505 static  int     no_cp932ext_f = FALSE;
506 /* ignore ZERO WIDTH NO-BREAK SPACE */
507 static  int     no_best_fit_chars_f = FALSE;
508 static  int     input_endian = ENDIAN_BIG;
509 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
510 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
511 static  void    encode_fallback_html(nkf_char c);
512 static  void    encode_fallback_xml(nkf_char c);
513 static  void    encode_fallback_java(nkf_char c);
514 static  void    encode_fallback_perl(nkf_char c);
515 static  void    encode_fallback_subchar(nkf_char c);
516 static  void    (*encode_fallback)(nkf_char c) = NULL;
517 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
518 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
519 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
520 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
521 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
522 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
523 static  void    w_status(struct input_code *, nkf_char);
524 #endif
525 #ifdef UTF8_OUTPUT_ENABLE
526 static  int     output_bom_f = FALSE;
527 static  int     output_endian = ENDIAN_BIG;
528 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
529 #endif
530 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
531 static  void    fold_conv(nkf_char c2,nkf_char c1);
532 static  void    nl_conv(nkf_char c2,nkf_char c1);
533 static  void    z_conv(nkf_char c2,nkf_char c1);
534 static  void    rot_conv(nkf_char c2,nkf_char c1);
535 static  void    hira_conv(nkf_char c2,nkf_char c1);
536 static  void    base64_conv(nkf_char c2,nkf_char c1);
537 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
538 static  void    no_connection(nkf_char c2,nkf_char c1);
539 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
540
541 static  void    code_score(struct input_code *ptr);
542 static  void    code_status(nkf_char c);
543
544 static  void    std_putc(nkf_char c);
545 static  nkf_char     std_getc(FILE *f);
546 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
547
548 static  nkf_char     broken_getc(FILE *f);
549 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
550
551 static  nkf_char     mime_begin(FILE *f);
552 static  nkf_char     mime_getc(FILE *f);
553 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
554
555 static  void    switch_mime_getc(void);
556 static  void    unswitch_mime_getc(void);
557 static  nkf_char     mime_begin_strict(FILE *f);
558 static  nkf_char     mime_getc_buf(FILE *f);
559 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
560 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
561
562 static  nkf_char     base64decode(nkf_char c);
563 static  void    mime_prechar(nkf_char c2, nkf_char c1);
564 static  void    mime_putc(nkf_char c);
565 static  void    open_mime(nkf_char c);
566 static  void    close_mime(void);
567 static  void    eof_mime(void);
568 static  void    mimeout_addchar(nkf_char c);
569 #ifndef PERL_XS
570 static  void    usage(void);
571 static  void    version(void);
572 static  void    show_configuration(void);
573 #endif
574 static  void    options(unsigned char *c);
575 static  void    reinit(void);
576
577 /* buffers */
578
579 #if !defined(PERL_XS) && !defined(WIN32DLL)
580 static unsigned char   stdibuf[IOBUF_SIZE];
581 static unsigned char   stdobuf[IOBUF_SIZE];
582 #endif
583 static unsigned char   hold_buf[HOLD_SIZE*2];
584 static int             hold_count = 0;
585
586 /* MIME preprocessor fifo */
587
588 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
589 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
590 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
591 static unsigned char           mime_buf[MIME_BUF_SIZE];
592 static unsigned int            mime_top = 0;
593 static unsigned int            mime_last = 0;  /* decoded */
594 static unsigned int            mime_input = 0; /* undecoded */
595 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
596
597 /* flags */
598 static int             unbuf_f = FALSE;
599 static int             estab_f = FALSE;
600 static int             nop_f = FALSE;
601 static int             binmode_f = TRUE;       /* binary mode */
602 static int             rot_f = FALSE;          /* rot14/43 mode */
603 static int             hira_f = FALSE;          /* hira/kata henkan */
604 static int             input_f = FALSE;        /* non fixed input code  */
605 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
606 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
607 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
608 static int             mimebuf_f = FALSE;      /* MIME buffered input */
609 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
610 static int             iso8859_f = FALSE;      /* ISO8859 through */
611 static int             mimeout_f = FALSE;       /* base64 mode */
612 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
613 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
614
615 #ifdef UNICODE_NORMALIZATION
616 static int nfc_f = FALSE;
617 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
618 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
619 static nkf_char nfc_getc(FILE *f);
620 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
621 #endif
622
623 #ifdef INPUT_OPTION
624 static int cap_f = FALSE;
625 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
626 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
627 static nkf_char cap_getc(FILE *f);
628 static nkf_char cap_ungetc(nkf_char c,FILE *f);
629
630 static int url_f = FALSE;
631 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
632 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
633 static nkf_char url_getc(FILE *f);
634 static nkf_char url_ungetc(nkf_char c,FILE *f);
635 #endif
636
637 #if defined(INT_IS_SHORT)
638 #define NKF_INT32_C(n)   (n##L)
639 #else
640 #define NKF_INT32_C(n)   (n)
641 #endif
642 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
643 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
644 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
645 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
646 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
647 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
648 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
649
650 #ifdef NUMCHAR_OPTION
651 static int numchar_f = FALSE;
652 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
653 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
654 static nkf_char numchar_getc(FILE *f);
655 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
656 #endif
657
658 #ifdef CHECK_OPTION
659 static int noout_f = FALSE;
660 static void no_putc(nkf_char c);
661 static int debug_f = FALSE;
662 static void debug(const char *str);
663 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
664 #endif
665
666 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
667 #if !defined PERL_XS
668 static  void    print_guessed_code(char *filename);
669 #endif
670 static  void    set_input_codename(char *codename);
671
672 #ifdef EXEC_IO
673 static int exec_f = 0;
674 #endif
675
676 #ifdef SHIFTJIS_CP932
677 /* invert IBM extended characters to others */
678 static int cp51932_f = FALSE;
679
680 /* invert NEC-selected IBM extended characters to IBM extended characters */
681 static int cp932inv_f = TRUE;
682
683 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
684 #endif /* SHIFTJIS_CP932 */
685
686 #ifdef X0212_ENABLE
687 static int x0212_f = FALSE;
688 static nkf_char x0212_shift(nkf_char c);
689 static nkf_char x0212_unshift(nkf_char c);
690 #endif
691 static int x0213_f = FALSE;
692
693 static unsigned char prefix_table[256];
694
695 static void set_code_score(struct input_code *ptr, nkf_char score);
696 static void clr_code_score(struct input_code *ptr, nkf_char score);
697 static void status_disable(struct input_code *ptr);
698 static void status_push_ch(struct input_code *ptr, nkf_char c);
699 static void status_clear(struct input_code *ptr);
700 static void status_reset(struct input_code *ptr);
701 static void status_reinit(struct input_code *ptr);
702 static void status_check(struct input_code *ptr, nkf_char c);
703 static void e_status(struct input_code *, nkf_char);
704 static void s_status(struct input_code *, nkf_char);
705
706 struct input_code input_code_list[] = {
707     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
708     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
709 #ifdef UTF8_INPUT_ENABLE
710     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
711     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
712     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
713 #endif
714     {0}
715 };
716
717 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
718 static int              base64_count = 0;
719
720 /* X0208 -> ASCII converter */
721
722 /* fold parameter */
723 static int             f_line = 0;    /* chars in line */
724 static int             f_prev = 0;
725 static int             fold_preserve_f = FALSE; /* preserve new lines */
726 static int             fold_f  = FALSE;
727 static int             fold_len  = 0;
728
729 /* options */
730 static unsigned char   kanji_intro = DEFAULT_J;
731 static unsigned char   ascii_intro = DEFAULT_R;
732
733 /* Folding */
734
735 #define FOLD_MARGIN  10
736 #define DEFAULT_FOLD 60
737
738 static int             fold_margin  = FOLD_MARGIN;
739
740 /* converters */
741
742 #ifdef DEFAULT_CODE_JIS
743 #   define  DEFAULT_CONV j_oconv
744 #endif
745 #ifdef DEFAULT_CODE_SJIS
746 #   define  DEFAULT_CONV s_oconv
747 #endif
748 #ifdef DEFAULT_CODE_EUC
749 #   define  DEFAULT_CONV e_oconv
750 #endif
751 #ifdef DEFAULT_CODE_UTF8
752 #   define  DEFAULT_CONV w_oconv
753 #endif
754
755 /* process default */
756 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
757
758 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
759 /* s_iconv or oconv */
760 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
761
762 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
763 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
764 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
765 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
766 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
767 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
768 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
769
770 /* static redirections */
771
772 static  void   (*o_putc)(nkf_char c) = std_putc;
773
774 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
775 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
776
777 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
778 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
779
780 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
781
782 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
783 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
784
785 /* for strict mime */
786 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
787 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
788
789 /* Global states */
790 static int output_mode = ASCII,    /* output kanji mode */
791            input_mode =  ASCII,    /* input kanji mode */
792            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
793 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
794
795 /* X0201 / X0208 conversion tables */
796
797 /* X0201 kana conversion table */
798 /* 90-9F A0-DF */
799 static const unsigned char cv[]= {
800     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
801     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
802     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
803     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
804     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
805     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
806     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
807     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
808     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
809     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
810     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
811     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
812     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
813     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
814     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
815     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
816     0x00,0x00};
817
818
819 /* X0201 kana conversion table for daguten */
820 /* 90-9F A0-DF */
821 static const unsigned char dv[]= {
822     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
827     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
828     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
829     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
830     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
831     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
832     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
833     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
834     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
838     0x00,0x00};
839
840 /* X0201 kana conversion table for han-daguten */
841 /* 90-9F A0-DF */
842 static const unsigned char ev[]= {
843     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
854     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
855     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
858     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859     0x00,0x00};
860
861
862 /* X0208 kigou conversion table */
863 /* 0x8140 - 0x819e */
864 static const unsigned char fv[] = {
865
866     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
867     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
868     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
869     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
870     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
871     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
872     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
873     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
874     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
875     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
877     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
878 } ;
879
880
881
882 static int             file_out_f = FALSE;
883 #ifdef OVERWRITE
884 static int             overwrite_f = FALSE;
885 static int             preserve_time_f = FALSE;
886 static int             backup_f = FALSE;
887 static char            *backup_suffix = "";
888 static char *get_backup_filename(const char *suffix, const char *filename);
889 #endif
890
891 static int nlmode_f = 0;   /* CR, LF, CRLF */
892 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
893 static nkf_char prev_cr = 0; /* CR or 0 */
894 #ifdef EASYWIN /*Easy Win */
895 static int             end_check;
896 #endif /*Easy Win */
897
898 #define STD_GC_BUFSIZE (256)
899 nkf_char std_gc_buf[STD_GC_BUFSIZE];
900 nkf_char std_gc_ndx;
901
902 char* nkf_strcpy(const char *str)
903 {
904     char* result = malloc(strlen(str) + 1);
905     if (!result){
906         perror(str);
907         return "";
908     }
909     strcpy(result, str);
910     return result;
911 }
912
913 static void nkf_str_upcase(const char *str, char *res, size_t length)
914 {
915     int i = 0;
916     for (; i < length && str[i]; i++) {
917         res[i] = nkf_toupper(str[i]);
918     }
919     res[i] = 0;
920 }
921
922 static nkf_encoding *nkf_enc_from_index(int idx)
923 {
924     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
925         return 0;
926     }
927     return &nkf_encoding_table[idx];
928 }
929
930 static int nkf_enc_find_index(const char *name)
931 {
932     int i, index = -1;
933     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
934         if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
935             return encoding_name_to_id_table[i].id;
936         }
937     }
938     return index;
939 }
940
941 static nkf_encoding *nkf_enc_find(const char *name)
942 {
943     int idx = -1;
944     idx = nkf_enc_find_index(name);
945     if (idx < 0) return 0;
946     return nkf_enc_from_index(idx);
947 }
948
949 #define nkf_enc_name(enc) (enc)->name
950 #define nkf_enc_to_index(enc) (enc)->id
951 #define nkf_enc_to_base_encoding(enc) (enc)->based_encoding
952
953 #ifdef WIN32DLL
954 #include "nkf32dll.c"
955 #elif defined(PERL_XS)
956 #else /* WIN32DLL */
957 int main(int argc, char **argv)
958 {
959     FILE  *fin;
960     unsigned char  *cp;
961
962     char *outfname = NULL;
963     char *origfname;
964
965 #ifdef EASYWIN /*Easy Win */
966     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
967 #endif
968
969     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
970         cp = (unsigned char *)*argv;
971         options(cp);
972         if (guess_f) {
973 #ifdef CHECK_OPTION
974             int debug_f_back = debug_f;
975 #endif
976 #ifdef EXEC_IO
977             int exec_f_back = exec_f;
978 #endif
979 #ifdef X0212_ENABLE
980             int x0212_f_back = x0212_f;
981 #endif
982             int x0213_f_back = x0213_f;
983             int guess_f_back = guess_f;
984             reinit();
985             guess_f = guess_f_back;
986             mime_f = FALSE;
987 #ifdef CHECK_OPTION
988             debug_f = debug_f_back;
989 #endif
990 #ifdef EXEC_IO
991             exec_f = exec_f_back;
992 #endif
993 #ifdef X0212_ENABLE
994             x0212_f = x0212_f_back;
995 #endif
996             x0213_f = x0213_f_back;
997         }
998 #ifdef EXEC_IO
999         if (exec_f){
1000             int fds[2], pid;
1001             if (pipe(fds) < 0 || (pid = fork()) < 0){
1002                 abort();
1003             }
1004             if (pid == 0){
1005                 if (exec_f > 0){
1006                     close(fds[0]);
1007                     dup2(fds[1], 1);
1008                 }else{
1009                     close(fds[1]);
1010                     dup2(fds[0], 0);
1011                 }
1012                 execvp(argv[1], &argv[1]);
1013             }
1014             if (exec_f > 0){
1015                 close(fds[1]);
1016                 dup2(fds[0], 0);
1017             }else{
1018                 close(fds[0]);
1019                 dup2(fds[1], 1);
1020             }
1021             argc = 0;
1022             break;
1023         }
1024 #endif
1025     }
1026
1027     if (binmode_f == TRUE)
1028 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1029     if (freopen("","wb",stdout) == NULL)
1030         return (-1);
1031 #else
1032     setbinmode(stdout);
1033 #endif
1034
1035     if (unbuf_f)
1036       setbuf(stdout, (char *) NULL);
1037     else
1038       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
1039
1040     if (argc == 0) {
1041       if (binmode_f == TRUE)
1042 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1043       if (freopen("","rb",stdin) == NULL) return (-1);
1044 #else
1045       setbinmode(stdin);
1046 #endif
1047       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
1048       if (nop_f)
1049           noconvert(stdin);
1050       else {
1051           kanji_convert(stdin);
1052           if (guess_f) print_guessed_code(NULL);
1053       }
1054     } else {
1055       int nfiles = argc;
1056         int is_argument_error = FALSE;
1057       while (argc--) {
1058             input_codename = NULL;
1059             input_newline = 0;
1060 #ifdef CHECK_OPTION
1061             iconv_for_check = 0;
1062 #endif
1063           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
1064               perror(*--argv);
1065                 *argv++;
1066                 is_argument_error = TRUE;
1067                 continue;
1068           } else {
1069 #ifdef OVERWRITE
1070               int fd = 0;
1071               int fd_backup = 0;
1072 #endif
1073
1074 /* reopen file for stdout */
1075               if (file_out_f == TRUE) {
1076 #ifdef OVERWRITE
1077                   if (overwrite_f){
1078                       outfname = malloc(strlen(origfname)
1079                                         + strlen(".nkftmpXXXXXX")
1080                                         + 1);
1081                       if (!outfname){
1082                           perror(origfname);
1083                           return -1;
1084                       }
1085                       strcpy(outfname, origfname);
1086 #ifdef MSDOS
1087                       {
1088                           int i;
1089                           for (i = strlen(outfname); i; --i){
1090                               if (outfname[i - 1] == '/'
1091                                   || outfname[i - 1] == '\\'){
1092                                   break;
1093                               }
1094                           }
1095                           outfname[i] = '\0';
1096                       }
1097                       strcat(outfname, "ntXXXXXX");
1098                       mktemp(outfname);
1099                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
1100                                 S_IREAD | S_IWRITE);
1101 #else
1102                       strcat(outfname, ".nkftmpXXXXXX");
1103                       fd = mkstemp(outfname);
1104 #endif
1105                       if (fd < 0
1106                           || (fd_backup = dup(fileno(stdout))) < 0
1107                           || dup2(fd, fileno(stdout)) < 0
1108                           ){
1109                           perror(origfname);
1110                           return -1;
1111                       }
1112                   }else
1113 #endif
1114                   if(argc == 1) {
1115                       outfname = *argv++;
1116                       argc--;
1117                   } else {
1118                       outfname = "nkf.out";
1119                   }
1120
1121                   if(freopen(outfname, "w", stdout) == NULL) {
1122                       perror (outfname);
1123                       return (-1);
1124                   }
1125                   if (binmode_f == TRUE) {
1126 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1127                       if (freopen("","wb",stdout) == NULL)
1128                            return (-1);
1129 #else
1130                       setbinmode(stdout);
1131 #endif
1132                   }
1133               }
1134               if (binmode_f == TRUE)
1135 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1136                  if (freopen("","rb",fin) == NULL)
1137                     return (-1);
1138 #else
1139                  setbinmode(fin);
1140 #endif
1141               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
1142               if (nop_f)
1143                   noconvert(fin);
1144               else {
1145                   char *filename = NULL;
1146                   kanji_convert(fin);
1147                   if (nfiles > 1) filename = origfname;
1148                   if (guess_f) print_guessed_code(filename);
1149               }
1150               fclose(fin);
1151 #ifdef OVERWRITE
1152               if (overwrite_f) {
1153                   struct stat     sb;
1154 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1155                   time_t tb[2];
1156 #else
1157                   struct utimbuf  tb;
1158 #endif
1159
1160                   fflush(stdout);
1161                   close(fd);
1162                   if (dup2(fd_backup, fileno(stdout)) < 0){
1163                       perror("dup2");
1164                   }
1165                   if (stat(origfname, &sb)) {
1166                       fprintf(stderr, "Can't stat %s\n", origfname);
1167                   }
1168                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
1169                   if (chmod(outfname, sb.st_mode)) {
1170                       fprintf(stderr, "Can't set permission %s\n", outfname);
1171                   }
1172
1173                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
1174                     if(preserve_time_f){
1175 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1176                         tb[0] = tb[1] = sb.st_mtime;
1177                         if (utime(outfname, tb)) {
1178                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1179                         }
1180 #else
1181                         tb.actime  = sb.st_atime;
1182                         tb.modtime = sb.st_mtime;
1183                         if (utime(outfname, &tb)) {
1184                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1185                         }
1186 #endif
1187                     }
1188                     if(backup_f){
1189                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
1190 #ifdef MSDOS
1191                         unlink(backup_filename);
1192 #endif
1193                         if (rename(origfname, backup_filename)) {
1194                             perror(backup_filename);
1195                             fprintf(stderr, "Can't rename %s to %s\n",
1196                                     origfname, backup_filename);
1197                         }
1198                     }else{
1199 #ifdef MSDOS
1200                         if (unlink(origfname)){
1201                             perror(origfname);
1202                         }
1203 #endif
1204                     }
1205                   if (rename(outfname, origfname)) {
1206                       perror(origfname);
1207                       fprintf(stderr, "Can't rename %s to %s\n",
1208                               outfname, origfname);
1209                   }
1210                   free(outfname);
1211               }
1212 #endif
1213           }
1214       }
1215         if (is_argument_error)
1216             return(-1);
1217     }
1218 #ifdef EASYWIN /*Easy Win */
1219     if (file_out_f == FALSE)
1220         scanf("%d",&end_check);
1221     else
1222         fclose(stdout);
1223 #else /* for Other OS */
1224     if (file_out_f == TRUE)
1225         fclose(stdout);
1226 #endif /*Easy Win */
1227     return (0);
1228 }
1229 #endif /* WIN32DLL */
1230
1231 #ifdef OVERWRITE
1232 char *get_backup_filename(const char *suffix, const char *filename)
1233 {
1234     char *backup_filename;
1235     int asterisk_count = 0;
1236     int i, j;
1237     int filename_length = strlen(filename);
1238
1239     for(i = 0; suffix[i]; i++){
1240         if(suffix[i] == '*') asterisk_count++;
1241     }
1242
1243     if(asterisk_count){
1244         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1245         if (!backup_filename){
1246             perror("Can't malloc backup filename.");
1247             return NULL;
1248         }
1249
1250         for(i = 0, j = 0; suffix[i];){
1251             if(suffix[i] == '*'){
1252                 backup_filename[j] = '\0';
1253                 strncat(backup_filename, filename, filename_length);
1254                 i++;
1255                 j += filename_length;
1256             }else{
1257                 backup_filename[j++] = suffix[i++];
1258             }
1259         }
1260         backup_filename[j] = '\0';
1261     }else{
1262         j = strlen(suffix) + filename_length;
1263         backup_filename = malloc( + 1);
1264         strcpy(backup_filename, filename);
1265         strcat(backup_filename, suffix);
1266         backup_filename[j] = '\0';
1267     }
1268     return backup_filename;
1269 }
1270 #endif
1271
1272 static const struct {
1273     const char *name;
1274     const char *alias;
1275 } long_option[] = {
1276     {"ic=", ""},
1277     {"oc=", ""},
1278     {"base64","jMB"},
1279     {"euc","e"},
1280     {"euc-input","E"},
1281     {"fj","jm"},
1282     {"help","v"},
1283     {"jis","j"},
1284     {"jis-input","J"},
1285     {"mac","sLm"},
1286     {"mime","jM"},
1287     {"mime-input","m"},
1288     {"msdos","sLw"},
1289     {"sjis","s"},
1290     {"sjis-input","S"},
1291     {"unix","eLu"},
1292     {"version","V"},
1293     {"windows","sLw"},
1294     {"hiragana","h1"},
1295     {"katakana","h2"},
1296     {"katakana-hiragana","h3"},
1297     {"guess=", ""},
1298     {"guess", "g1"},
1299     {"cp932", ""},
1300     {"no-cp932", ""},
1301 #ifdef X0212_ENABLE
1302     {"x0212", ""},
1303 #endif
1304 #ifdef UTF8_OUTPUT_ENABLE
1305     {"utf8", "w"},
1306     {"utf16", "w16"},
1307     {"ms-ucs-map", ""},
1308     {"fb-skip", ""},
1309     {"fb-html", ""},
1310     {"fb-xml", ""},
1311     {"fb-perl", ""},
1312     {"fb-java", ""},
1313     {"fb-subchar", ""},
1314     {"fb-subchar=", ""},
1315 #endif
1316 #ifdef UTF8_INPUT_ENABLE
1317     {"utf8-input", "W"},
1318     {"utf16-input", "W16"},
1319     {"no-cp932ext", ""},
1320     {"no-best-fit-chars",""},
1321 #endif
1322 #ifdef UNICODE_NORMALIZATION
1323     {"utf8mac-input", ""},
1324 #endif
1325 #ifdef OVERWRITE
1326     {"overwrite", ""},
1327     {"overwrite=", ""},
1328     {"in-place", ""},
1329     {"in-place=", ""},
1330 #endif
1331 #ifdef INPUT_OPTION
1332     {"cap-input", ""},
1333     {"url-input", ""},
1334 #endif
1335 #ifdef NUMCHAR_OPTION
1336     {"numchar-input", ""},
1337 #endif
1338 #ifdef CHECK_OPTION
1339     {"no-output", ""},
1340     {"debug", ""},
1341 #endif
1342 #ifdef SHIFTJIS_CP932
1343     {"cp932inv", ""},
1344 #endif
1345 #ifdef EXEC_IO
1346     {"exec-in", ""},
1347     {"exec-out", ""},
1348 #endif
1349     {"prefix=", ""},
1350 };
1351
1352 static int option_mode = 0;
1353
1354 void options(unsigned char *cp)
1355 {
1356     nkf_char i, j;
1357     unsigned char *p;
1358     unsigned char *cp_back = NULL;
1359     char codeset[32];
1360     nkf_encoding *enc;
1361
1362     if (option_mode==1)
1363         return;
1364     while(*cp && *cp++!='-');
1365     while (*cp || cp_back) {
1366         if(!*cp){
1367             cp = cp_back;
1368             cp_back = NULL;
1369             continue;
1370         }
1371         p = 0;
1372         switch (*cp++) {
1373         case '-':  /* literal options */
1374             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1375                 option_mode = 1;
1376                 return;
1377             }
1378             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1379                 p = (unsigned char *)long_option[i].name;
1380                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1381                 if (*p == cp[j] || cp[j] == SP){
1382                     p = &cp[j] + 1;
1383                     break;
1384                 }
1385                 p = 0;
1386             }
1387             if (p == 0) {
1388                 fprintf(stderr, "unknown long option: --%s\n", cp);
1389                 return;
1390             }
1391             while(*cp && *cp != SP && cp++);
1392             if (long_option[i].alias[0]){
1393                 cp_back = cp;
1394                 cp = (unsigned char *)long_option[i].alias;
1395             }else{
1396                 if (strcmp(long_option[i].name, "ic=") == 0){
1397                     nkf_str_upcase(p, codeset, 32);
1398                     enc = nkf_enc_find(codeset);
1399                     switch (nkf_enc_to_index(enc)) {
1400                     case ISO_2022_JP:
1401                         input_f = JIS_INPUT;
1402                         break;
1403                     case CP50220:
1404                     case CP50221:
1405                     case CP50222:
1406                         input_f = JIS_INPUT;
1407 #ifdef SHIFTJIS_CP932
1408                         cp51932_f = TRUE;
1409 #endif
1410 #ifdef UTF8_OUTPUT_ENABLE
1411                         ms_ucs_map_f = UCS_MAP_CP932;
1412 #endif
1413                         break;
1414                     case ISO_2022_JP_1:
1415                         input_f = JIS_INPUT;
1416 #ifdef X0212_ENABLE
1417                         x0212_f = TRUE;
1418 #endif
1419                         break;
1420                     case ISO_2022_JP_3:
1421                         input_f = JIS_INPUT;
1422 #ifdef X0212_ENABLE
1423                         x0212_f = TRUE;
1424 #endif
1425                         x0213_f = TRUE;
1426                         break;
1427                     case SHIFT_JIS:
1428                         input_f = SJIS_INPUT;
1429                         break;
1430                     case WINDOWS_31J:
1431                         input_f = SJIS_INPUT;
1432 #ifdef SHIFTJIS_CP932
1433                         cp51932_f = TRUE;
1434 #endif
1435 #ifdef UTF8_OUTPUT_ENABLE
1436                         ms_ucs_map_f = UCS_MAP_CP932;
1437 #endif
1438                         break;
1439                     case CP10001:
1440                         input_f = SJIS_INPUT;
1441 #ifdef SHIFTJIS_CP932
1442                         cp51932_f = TRUE;
1443 #endif
1444 #ifdef UTF8_OUTPUT_ENABLE
1445                         ms_ucs_map_f = UCS_MAP_CP10001;
1446 #endif
1447                         break;
1448                     case EUC_JP:
1449                         input_f = EUC_INPUT;
1450                         break;
1451                     case CP51932:
1452                         input_f = EUC_INPUT;
1453 #ifdef SHIFTJIS_CP932
1454                         cp51932_f = TRUE;
1455 #endif
1456 #ifdef UTF8_OUTPUT_ENABLE
1457                         ms_ucs_map_f = UCS_MAP_CP932;
1458 #endif
1459                         break;
1460                     case EUCJP_MS:
1461                         input_f = EUC_INPUT;
1462 #ifdef SHIFTJIS_CP932
1463                         cp51932_f = FALSE;
1464 #endif
1465 #ifdef UTF8_OUTPUT_ENABLE
1466                         ms_ucs_map_f = UCS_MAP_MS;
1467 #endif
1468                         break;
1469                     case EUCJP_ASCII:
1470                         input_f = EUC_INPUT;
1471 #ifdef SHIFTJIS_CP932
1472                         cp51932_f = FALSE;
1473 #endif
1474 #ifdef UTF8_OUTPUT_ENABLE
1475                         ms_ucs_map_f = UCS_MAP_ASCII;
1476 #endif
1477                         break;
1478                     case SHIFT_JISX0213:
1479                     case SHIFT_JIS_2004:
1480                         input_f = SJIS_INPUT;
1481                         x0213_f = TRUE;
1482 #ifdef SHIFTJIS_CP932
1483                         cp51932_f = FALSE;
1484 #endif
1485                         break;
1486                     case EUC_JISX0213:
1487                     case EUC_JIS_2004:
1488                         input_f = EUC_INPUT;
1489                         x0213_f = TRUE;
1490 #ifdef SHIFTJIS_CP932
1491                         cp51932_f = FALSE;
1492 #endif
1493                         break;
1494 #ifdef UTF8_INPUT_ENABLE
1495                     case UTF_8:
1496                     case UTF_8N:
1497                     case UTF_8_BOM:
1498                         input_f = UTF8_INPUT;
1499                         break;
1500 #ifdef UNICODE_NORMALIZATION
1501                     case UTF8_MAC:
1502                         input_f = UTF8_INPUT;
1503                         nfc_f = TRUE;
1504                         break;
1505 #endif
1506                     case UTF_16:
1507                     case UTF_16BE:
1508                     case UTF_16BE_BOM:
1509                         input_f = UTF16_INPUT;
1510                         input_endian = ENDIAN_BIG;
1511                         break;
1512                     case UTF_16LE:
1513                     case UTF_16LE_BOM:
1514                         input_f = UTF16_INPUT;
1515                         input_endian = ENDIAN_LITTLE;
1516                         break;
1517                     case UTF_32:
1518                     case UTF_32BE:
1519                     case UTF_32BE_BOM:
1520                         input_f = UTF32_INPUT;
1521                         input_endian = ENDIAN_BIG;
1522                         break;
1523                     case UTF_32LE:
1524                     case UTF_32LE_BOM:
1525                         input_f = UTF32_INPUT;
1526                         input_endian = ENDIAN_LITTLE;
1527                         break;
1528 #endif
1529                     default:
1530                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1531                         break;
1532                     }
1533                     continue;
1534                 }
1535                 if (strcmp(long_option[i].name, "oc=") == 0){
1536                     x0201_f = FALSE;
1537                     nkf_str_upcase(p, codeset, 32);
1538                     output_encoding = nkf_enc_find(codeset);
1539                     switch (nkf_enc_to_index(output_encoding)) {
1540                     case ISO_2022_JP:
1541                         output_conv = j_oconv;
1542                         break;
1543                     case CP50220:
1544                             output_conv = j_oconv;
1545                             x0201_f = TRUE;
1546 #ifdef SHIFTJIS_CP932
1547                             cp932inv_f = FALSE;
1548 #endif
1549 #ifdef UTF8_OUTPUT_ENABLE
1550                             ms_ucs_map_f = UCS_MAP_CP932;
1551 #endif
1552                         break;
1553                     case CP50221:
1554                         output_conv = j_oconv;
1555 #ifdef SHIFTJIS_CP932
1556                         cp932inv_f = FALSE;
1557 #endif
1558 #ifdef UTF8_OUTPUT_ENABLE
1559                         ms_ucs_map_f = UCS_MAP_CP932;
1560 #endif
1561                         break;
1562                     case ISO_2022_JP_1:
1563                         output_conv = j_oconv;
1564 #ifdef X0212_ENABLE
1565                         x0212_f = TRUE;
1566 #endif
1567 #ifdef SHIFTJIS_CP932
1568                         cp932inv_f = FALSE;
1569 #endif
1570                         break;
1571                     case ISO_2022_JP_3:
1572                         output_conv = j_oconv;
1573 #ifdef X0212_ENABLE
1574                         x0212_f = TRUE;
1575 #endif
1576                         x0213_f = TRUE;
1577 #ifdef SHIFTJIS_CP932
1578                         cp932inv_f = FALSE;
1579 #endif
1580                         break;
1581                     case SHIFT_JIS:
1582                         output_conv = s_oconv;
1583                         break;
1584                     case WINDOWS_31J:
1585                         output_conv = s_oconv;
1586 #ifdef UTF8_OUTPUT_ENABLE
1587                         ms_ucs_map_f = UCS_MAP_CP932;
1588 #endif
1589                         break;
1590                     case CP10001:
1591                         output_conv = s_oconv;
1592 #ifdef UTF8_OUTPUT_ENABLE
1593                         ms_ucs_map_f = UCS_MAP_CP10001;
1594 #endif
1595                         break;
1596                     case EUC_JP:
1597                         output_conv = e_oconv;
1598                         break;
1599                     case CP51932:
1600                         output_conv = e_oconv;
1601 #ifdef SHIFTJIS_CP932
1602                         cp932inv_f = FALSE;
1603 #endif
1604 #ifdef UTF8_OUTPUT_ENABLE
1605                         ms_ucs_map_f = UCS_MAP_CP932;
1606 #endif
1607                         break;
1608                     case EUCJP_MS:
1609                         output_conv = e_oconv;
1610 #ifdef X0212_ENABLE
1611                         x0212_f = TRUE;
1612 #endif
1613 #ifdef UTF8_OUTPUT_ENABLE
1614                         ms_ucs_map_f = UCS_MAP_MS;
1615 #endif
1616                         break;
1617                     case EUCJP_ASCII:
1618                         output_conv = e_oconv;
1619 #ifdef X0212_ENABLE
1620                         x0212_f = TRUE;
1621 #endif
1622 #ifdef UTF8_OUTPUT_ENABLE
1623                         ms_ucs_map_f = UCS_MAP_ASCII;
1624 #endif
1625                         break;
1626                     case SHIFT_JISX0213:
1627                     case SHIFT_JIS_2004:
1628                             output_conv = s_oconv;
1629                             x0213_f = TRUE;
1630 #ifdef SHIFTJIS_CP932
1631                             cp932inv_f = FALSE;
1632 #endif
1633                         break;
1634                     case EUC_JISX0213:
1635                     case EUC_JIS_2004:
1636                         output_conv = e_oconv;
1637 #ifdef X0212_ENABLE
1638                         x0212_f = TRUE;
1639 #endif
1640                         x0213_f = TRUE;
1641 #ifdef SHIFTJIS_CP932
1642                         cp932inv_f = FALSE;
1643 #endif
1644                         break;
1645 #ifdef UTF8_OUTPUT_ENABLE
1646                     case UTF_8:
1647                     case UTF_8N:
1648                         output_conv = w_oconv;
1649                         break;
1650                     case UTF_8_BOM:
1651                         output_conv = w_oconv;
1652                         output_bom_f = TRUE;
1653                         break;
1654                     case UTF_16BE:
1655                         output_conv = w_oconv16;
1656                         break;
1657                     case UTF_16:
1658                     case UTF_16BE_BOM:
1659                         output_conv = w_oconv16;
1660                         output_bom_f = TRUE;
1661                         break;
1662                     case UTF_16LE:
1663                         output_conv = w_oconv16;
1664                         output_endian = ENDIAN_LITTLE;
1665                         break;
1666                     case UTF_16LE_BOM:
1667                         output_conv = w_oconv16;
1668                         output_endian = ENDIAN_LITTLE;
1669                         output_bom_f = TRUE;
1670                         break;
1671                     case UTF_32:
1672                     case UTF_32BE:
1673                         output_conv = w_oconv32;
1674                         break;
1675                     case UTF_32BE_BOM:
1676                         output_conv = w_oconv32;
1677                         output_bom_f = TRUE;
1678                         break;
1679                     case UTF_32LE:
1680                         output_conv = w_oconv32;
1681                         output_endian = ENDIAN_LITTLE;
1682                         break;
1683                     case UTF_32LE_BOM:
1684                         output_conv = w_oconv32;
1685                         output_endian = ENDIAN_LITTLE;
1686                         output_bom_f = TRUE;
1687                         break;
1688 #endif
1689                     default:
1690                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1691                         break;
1692                     }
1693                     continue;
1694                 }
1695                 if (strcmp(long_option[i].name, "guess=") == 0){
1696                     if (p[0] == '1') {
1697                         guess_f = 2;
1698                     } else {
1699                         guess_f = 1;
1700                     }
1701                     continue;
1702                 }
1703 #ifdef OVERWRITE
1704                 if (strcmp(long_option[i].name, "overwrite") == 0){
1705                     file_out_f = TRUE;
1706                     overwrite_f = TRUE;
1707                     preserve_time_f = TRUE;
1708                     continue;
1709                 }
1710                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1711                     file_out_f = TRUE;
1712                     overwrite_f = TRUE;
1713                     preserve_time_f = TRUE;
1714                     backup_f = TRUE;
1715                     backup_suffix = malloc(strlen((char *) p) + 1);
1716                     strcpy(backup_suffix, (char *) p);
1717                     continue;
1718                 }
1719                 if (strcmp(long_option[i].name, "in-place") == 0){
1720                     file_out_f = TRUE;
1721                     overwrite_f = TRUE;
1722                     preserve_time_f = FALSE;
1723                     continue;
1724                 }
1725                 if (strcmp(long_option[i].name, "in-place=") == 0){
1726                     file_out_f = TRUE;
1727                     overwrite_f = TRUE;
1728                     preserve_time_f = FALSE;
1729                     backup_f = TRUE;
1730                     backup_suffix = malloc(strlen((char *) p) + 1);
1731                     strcpy(backup_suffix, (char *) p);
1732                     continue;
1733                 }
1734 #endif
1735 #ifdef INPUT_OPTION
1736                 if (strcmp(long_option[i].name, "cap-input") == 0){
1737                     cap_f = TRUE;
1738                     continue;
1739                 }
1740                 if (strcmp(long_option[i].name, "url-input") == 0){
1741                     url_f = TRUE;
1742                     continue;
1743                 }
1744 #endif
1745 #ifdef NUMCHAR_OPTION
1746                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1747                     numchar_f = TRUE;
1748                     continue;
1749                 }
1750 #endif
1751 #ifdef CHECK_OPTION
1752                 if (strcmp(long_option[i].name, "no-output") == 0){
1753                     noout_f = TRUE;
1754                     continue;
1755                 }
1756                 if (strcmp(long_option[i].name, "debug") == 0){
1757                     debug_f = TRUE;
1758                     continue;
1759                 }
1760 #endif
1761                 if (strcmp(long_option[i].name, "cp932") == 0){
1762 #ifdef SHIFTJIS_CP932
1763                     cp51932_f = TRUE;
1764                     cp932inv_f = TRUE;
1765 #endif
1766 #ifdef UTF8_OUTPUT_ENABLE
1767                     ms_ucs_map_f = UCS_MAP_CP932;
1768 #endif
1769                     continue;
1770                 }
1771                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1772 #ifdef SHIFTJIS_CP932
1773                     cp51932_f = FALSE;
1774                     cp932inv_f = FALSE;
1775 #endif
1776 #ifdef UTF8_OUTPUT_ENABLE
1777                     ms_ucs_map_f = UCS_MAP_ASCII;
1778 #endif
1779                     continue;
1780                 }
1781 #ifdef SHIFTJIS_CP932
1782                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1783                     cp932inv_f = TRUE;
1784                     continue;
1785                 }
1786 #endif
1787
1788 #ifdef X0212_ENABLE
1789                 if (strcmp(long_option[i].name, "x0212") == 0){
1790                     x0212_f = TRUE;
1791                     continue;
1792                 }
1793 #endif
1794
1795 #ifdef EXEC_IO
1796                   if (strcmp(long_option[i].name, "exec-in") == 0){
1797                       exec_f = 1;
1798                       return;
1799                   }
1800                   if (strcmp(long_option[i].name, "exec-out") == 0){
1801                       exec_f = -1;
1802                       return;
1803                   }
1804 #endif
1805 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1806                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1807                     no_cp932ext_f = TRUE;
1808                     continue;
1809                 }
1810                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1811                     no_best_fit_chars_f = TRUE;
1812                     continue;
1813                 }
1814                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1815                     encode_fallback = NULL;
1816                     continue;
1817                 }
1818                 if (strcmp(long_option[i].name, "fb-html") == 0){
1819                     encode_fallback = encode_fallback_html;
1820                     continue;
1821                 }
1822                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1823                     encode_fallback = encode_fallback_xml;
1824                     continue;
1825                 }
1826                 if (strcmp(long_option[i].name, "fb-java") == 0){
1827                     encode_fallback = encode_fallback_java;
1828                     continue;
1829                 }
1830                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1831                     encode_fallback = encode_fallback_perl;
1832                     continue;
1833                 }
1834                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1835                     encode_fallback = encode_fallback_subchar;
1836                     continue;
1837                 }
1838                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1839                     encode_fallback = encode_fallback_subchar;
1840                     unicode_subchar = 0;
1841                     if (p[0] != '0'){
1842                         /* decimal number */
1843                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1844                             unicode_subchar *= 10;
1845                             unicode_subchar += hex2bin(p[i]);
1846                         }
1847                     }else if(p[1] == 'x' || p[1] == 'X'){
1848                         /* hexadecimal number */
1849                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1850                             unicode_subchar <<= 4;
1851                             unicode_subchar |= hex2bin(p[i]);
1852                         }
1853                     }else{
1854                         /* octal number */
1855                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1856                             unicode_subchar *= 8;
1857                             unicode_subchar += hex2bin(p[i]);
1858                         }
1859                     }
1860                     w16e_conv(unicode_subchar, &i, &j);
1861                     unicode_subchar = i<<8 | j;
1862                     continue;
1863                 }
1864 #endif
1865 #ifdef UTF8_OUTPUT_ENABLE
1866                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1867                     ms_ucs_map_f = UCS_MAP_MS;
1868                     continue;
1869                 }
1870 #endif
1871 #ifdef UNICODE_NORMALIZATION
1872                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1873                     input_f = UTF8_INPUT;
1874                     nfc_f = TRUE;
1875                     continue;
1876                 }
1877 #endif
1878                 if (strcmp(long_option[i].name, "prefix=") == 0){
1879                     if (nkf_isgraph(p[0])){
1880                         for (i = 1; nkf_isgraph(p[i]); i++){
1881                             prefix_table[p[i]] = p[0];
1882                         }
1883                     }
1884                     continue;
1885                 }
1886             }
1887             continue;
1888         case 'b':           /* buffered mode */
1889             unbuf_f = FALSE;
1890             continue;
1891         case 'u':           /* non bufferd mode */
1892             unbuf_f = TRUE;
1893             continue;
1894         case 't':           /* transparent mode */
1895             if (*cp=='1') {
1896                 /* alias of -t */
1897                 nop_f = TRUE;
1898                 *cp++;
1899             } else if (*cp=='2') {
1900                 /*
1901                  * -t with put/get
1902                  *
1903                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1904                  *
1905                  */
1906                 nop_f = 2;
1907                 *cp++;
1908             } else
1909                 nop_f = TRUE;
1910             continue;
1911         case 'j':           /* JIS output */
1912         case 'n':
1913             output_conv = j_oconv;
1914             output_encoding = nkf_enc_from_index(ISO_2022_JP);
1915             continue;
1916         case 'e':           /* AT&T EUC output */
1917             output_conv = e_oconv;
1918             cp932inv_f = FALSE;
1919             output_encoding = nkf_enc_from_index(EUC_JP);
1920             continue;
1921         case 's':           /* SJIS output */
1922             output_conv = s_oconv;
1923             output_encoding = nkf_enc_from_index(SHIFT_JIS);
1924             continue;
1925         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1926             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1927             input_f = LATIN1_INPUT;
1928             continue;
1929         case 'i':           /* Kanji IN ESC-$-@/B */
1930             if (*cp=='@'||*cp=='B')
1931                 kanji_intro = *cp++;
1932             continue;
1933         case 'o':           /* ASCII IN ESC-(-J/B */
1934             if (*cp=='J'||*cp=='B'||*cp=='H')
1935                 ascii_intro = *cp++;
1936             continue;
1937         case 'h':
1938             /*
1939                 bit:1   katakana->hiragana
1940                 bit:2   hiragana->katakana
1941             */
1942             if ('9'>= *cp && *cp>='0')
1943                 hira_f |= (*cp++ -'0');
1944             else
1945                 hira_f |= 1;
1946             continue;
1947         case 'r':
1948             rot_f = TRUE;
1949             continue;
1950 #if defined(MSDOS) || defined(__OS2__)
1951         case 'T':
1952             binmode_f = FALSE;
1953             continue;
1954 #endif
1955 #ifndef PERL_XS
1956         case 'V':
1957             show_configuration();
1958             exit(1);
1959             break;
1960         case 'v':
1961             usage();
1962             exit(1);
1963             break;
1964 #endif
1965 #ifdef UTF8_OUTPUT_ENABLE
1966         case 'w':           /* UTF-8 output */
1967             if (cp[0] == '8') {
1968                 output_conv = w_oconv; cp++;
1969                 if (cp[0] == '0'){
1970                     cp++;
1971                     output_encoding = nkf_enc_from_index(UTF_8N);
1972                 } else {
1973                     output_bom_f = TRUE;
1974                     output_encoding = nkf_enc_from_index(UTF_8_BOM);
1975                 }
1976             } else {
1977                 int enc_idx;
1978                 if ('1'== cp[0] && '6'==cp[1]) {
1979                     output_conv = w_oconv16; cp+=2;
1980                     enc_idx = UTF_16;
1981                 } else if ('3'== cp[0] && '2'==cp[1]) {
1982                     output_conv = w_oconv32; cp+=2;
1983                     enc_idx = UTF_32;
1984                 } else {
1985                     output_conv = w_oconv;
1986                     output_encoding = nkf_enc_from_index(UTF_8);
1987                     continue;
1988                 }
1989                 if (cp[0]=='L') {
1990                     cp++;
1991                     output_endian = ENDIAN_LITTLE;
1992                 } else if (cp[0] == 'B') {
1993                     cp++;
1994                 } else {
1995                     output_encoding = nkf_enc_from_index(enc_idx);
1996                     continue;
1997                 }
1998                 if (cp[0] == '0'){
1999                     cp++;
2000                     enc_idx = enc_idx == UTF_16
2001                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
2002                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
2003                 } else {
2004                     output_bom_f = TRUE;
2005                     enc_idx = enc_idx == UTF_16
2006                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
2007                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
2008                 }
2009                 output_encoding = nkf_enc_from_index(enc_idx);
2010             }
2011             continue;
2012 #endif
2013 #ifdef UTF8_INPUT_ENABLE
2014         case 'W':           /* UTF input */
2015             if (cp[0] == '8') {
2016                 cp++;
2017                 input_f = UTF8_INPUT;
2018             }else{
2019                 if ('1'== cp[0] && '6'==cp[1]) {
2020                     cp += 2;
2021                     input_f = UTF16_INPUT;
2022                     input_endian = ENDIAN_BIG;
2023                 } else if ('3'== cp[0] && '2'==cp[1]) {
2024                     cp += 2;
2025                     input_f = UTF32_INPUT;
2026                     input_endian = ENDIAN_BIG;
2027                 } else {
2028                     input_f = UTF8_INPUT;
2029                     continue;
2030                 }
2031                 if (cp[0]=='L') {
2032                     cp++;
2033                     input_endian = ENDIAN_LITTLE;
2034                 } else if (cp[0] == 'B') {
2035                     cp++;
2036                 }
2037             }
2038             continue;
2039 #endif
2040         /* Input code assumption */
2041         case 'J':   /* JIS input */
2042             input_f = JIS_INPUT;
2043             continue;
2044         case 'E':   /* AT&T EUC input */
2045             input_f = EUC_INPUT;
2046             continue;
2047         case 'S':   /* MS Kanji input */
2048             input_f = SJIS_INPUT;
2049             continue;
2050         case 'Z':   /* Convert X0208 alphabet to asii */
2051             /* alpha_f
2052                bit:0   Convert JIS X 0208 Alphabet to ASCII
2053                bit:1   Convert Kankaku to one space
2054                bit:2   Convert Kankaku to two spaces
2055                bit:3   Convert HTML Entity
2056                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
2057             */
2058             while ('0'<= *cp && *cp <='9') {
2059                 alpha_f |= 1 << (*cp++ - '0');
2060             }
2061             if (!alpha_f) alpha_f = 1;
2062             continue;
2063         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
2064             x0201_f = FALSE;    /* No X0201->X0208 conversion */
2065             /* accept  X0201
2066                     ESC-(-I     in JIS, EUC, MS Kanji
2067                     SI/SO       in JIS, EUC, MS Kanji
2068                     SSO         in EUC, JIS, not in MS Kanji
2069                     MS Kanji (0xa0-0xdf)
2070                output  X0201
2071                     ESC-(-I     in JIS (0x20-0x5f)
2072                     SSO         in EUC (0xa0-0xdf)
2073                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
2074             */
2075             continue;
2076         case 'X':   /* Convert X0201 kana to X0208 */
2077             x0201_f = TRUE;
2078             continue;
2079         case 'F':   /* prserve new lines */
2080             fold_preserve_f = TRUE;
2081         case 'f':   /* folding -f60 or -f */
2082             fold_f = TRUE;
2083             fold_len = 0;
2084             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2085                 fold_len *= 10;
2086                 fold_len += *cp++ - '0';
2087             }
2088             if (!(0<fold_len && fold_len<BUFSIZ))
2089                 fold_len = DEFAULT_FOLD;
2090             if (*cp=='-') {
2091                 fold_margin = 0;
2092                 cp++;
2093                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2094                     fold_margin *= 10;
2095                     fold_margin += *cp++ - '0';
2096                 }
2097             }
2098             continue;
2099         case 'm':   /* MIME support */
2100             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
2101             if (*cp=='B'||*cp=='Q') {
2102                 mime_decode_mode = *cp++;
2103                 mimebuf_f = FIXED_MIME;
2104             } else if (*cp=='N') {
2105                 mime_f = TRUE; cp++;
2106             } else if (*cp=='S') {
2107                 mime_f = STRICT_MIME; cp++;
2108             } else if (*cp=='0') {
2109                 mime_decode_f = FALSE;
2110                 mime_f = FALSE; cp++;
2111             }
2112             continue;
2113         case 'M':   /* MIME output */
2114             if (*cp=='B') {
2115                 mimeout_mode = 'B';
2116                 mimeout_f = FIXED_MIME; cp++;
2117             } else if (*cp=='Q') {
2118                 mimeout_mode = 'Q';
2119                 mimeout_f = FIXED_MIME; cp++;
2120             } else {
2121                 mimeout_f = TRUE;
2122             }
2123             continue;
2124         case 'B':   /* Broken JIS support */
2125             /*  bit:0   no ESC JIS
2126                 bit:1   allow any x on ESC-(-x or ESC-$-x
2127                 bit:2   reset to ascii on NL
2128             */
2129             if ('9'>= *cp && *cp>='0')
2130                 broken_f |= 1<<(*cp++ -'0');
2131             else
2132                 broken_f |= TRUE;
2133             continue;
2134 #ifndef PERL_XS
2135         case 'O':/* for Output file */
2136             file_out_f = TRUE;
2137             continue;
2138 #endif
2139         case 'c':/* add cr code */
2140             nlmode_f = CRLF;
2141             continue;
2142         case 'd':/* delete cr code */
2143             nlmode_f = LF;
2144             continue;
2145         case 'I':   /* ISO-2022-JP output */
2146             iso2022jp_f = TRUE;
2147             continue;
2148         case 'L':  /* line mode */
2149             if (*cp=='u') {         /* unix */
2150                 nlmode_f = LF; cp++;
2151             } else if (*cp=='m') { /* mac */
2152                 nlmode_f = CR; cp++;
2153             } else if (*cp=='w') { /* windows */
2154                 nlmode_f = CRLF; cp++;
2155             } else if (*cp=='0') { /* no conversion  */
2156                 nlmode_f = 0; cp++;
2157             }
2158             continue;
2159 #ifndef PERL_XS
2160         case 'g':
2161             if (*cp == '1') {
2162                 guess_f = 2;
2163                 cp++;
2164             } else if (*cp == '0') {
2165                 guess_f = 1;
2166                 cp++;
2167             } else {
2168                 guess_f = 1;
2169             }
2170             continue;
2171 #endif
2172         case SP:
2173         /* module muliple options in a string are allowed for Perl moudle  */
2174             while(*cp && *cp++!='-');
2175             continue;
2176         default:
2177             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
2178             /* bogus option but ignored */
2179             continue;
2180         }
2181     }
2182 }
2183
2184 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2185 {
2186     if (iconv_func){
2187         struct input_code *p = input_code_list;
2188         while (p->name){
2189             if (iconv_func == p->iconv_func){
2190                 return p;
2191             }
2192             p++;
2193         }
2194     }
2195     return 0;
2196 }
2197
2198 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2199 {
2200 #ifdef INPUT_CODE_FIX
2201     if (f || !input_f)
2202 #endif
2203         if (estab_f != f){
2204             estab_f = f;
2205         }
2206
2207     if (iconv_func
2208 #ifdef INPUT_CODE_FIX
2209         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
2210 #endif
2211         ){
2212         iconv = iconv_func;
2213     }
2214 #ifdef CHECK_OPTION
2215     if (estab_f && iconv_for_check != iconv){
2216         struct input_code *p = find_inputcode_byfunc(iconv);
2217         if (p){
2218             set_input_codename(p->name);
2219             debug(p->name);
2220         }
2221         iconv_for_check = iconv;
2222     }
2223 #endif
2224 }
2225
2226 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
2227 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
2228 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
2229 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
2230 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2231 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
2232 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
2233 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
2234
2235 #define SCORE_INIT (SCORE_iMIME)
2236
2237 static const char score_table_A0[] = {
2238     0, 0, 0, 0,
2239     0, 0, 0, 0,
2240     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2241     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2242 };
2243
2244 static const char score_table_F0[] = {
2245     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2246     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2247     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2248     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2249 };
2250
2251 void set_code_score(struct input_code *ptr, nkf_char score)
2252 {
2253     if (ptr){
2254         ptr->score |= score;
2255     }
2256 }
2257
2258 void clr_code_score(struct input_code *ptr, nkf_char score)
2259 {
2260     if (ptr){
2261         ptr->score &= ~score;
2262     }
2263 }
2264
2265 void code_score(struct input_code *ptr)
2266 {
2267     nkf_char c2 = ptr->buf[0];
2268 #ifdef UTF8_OUTPUT_ENABLE
2269     nkf_char c1 = ptr->buf[1];
2270 #endif
2271     if (c2 < 0){
2272         set_code_score(ptr, SCORE_ERROR);
2273     }else if (c2 == SSO){
2274         set_code_score(ptr, SCORE_KANA);
2275     }else if (c2 == 0x8f){
2276         set_code_score(ptr, SCORE_X0212);
2277 #ifdef UTF8_OUTPUT_ENABLE
2278     }else if (!e2w_conv(c2, c1)){
2279         set_code_score(ptr, SCORE_NO_EXIST);
2280 #endif
2281     }else if ((c2 & 0x70) == 0x20){
2282         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2283     }else if ((c2 & 0x70) == 0x70){
2284         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2285     }else if ((c2 & 0x70) >= 0x50){
2286         set_code_score(ptr, SCORE_L2);
2287     }
2288 }
2289
2290 void status_disable(struct input_code *ptr)
2291 {
2292     ptr->stat = -1;
2293     ptr->buf[0] = -1;
2294     code_score(ptr);
2295     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2296 }
2297
2298 void status_push_ch(struct input_code *ptr, nkf_char c)
2299 {
2300     ptr->buf[ptr->index++] = c;
2301 }
2302
2303 void status_clear(struct input_code *ptr)
2304 {
2305     ptr->stat = 0;
2306     ptr->index = 0;
2307 }
2308
2309 void status_reset(struct input_code *ptr)
2310 {
2311     status_clear(ptr);
2312     ptr->score = SCORE_INIT;
2313 }
2314
2315 void status_reinit(struct input_code *ptr)
2316 {
2317     status_reset(ptr);
2318     ptr->_file_stat = 0;
2319 }
2320
2321 void status_check(struct input_code *ptr, nkf_char c)
2322 {
2323     if (c <= DEL && estab_f){
2324         status_reset(ptr);
2325     }
2326 }
2327
2328 void s_status(struct input_code *ptr, nkf_char c)
2329 {
2330     switch(ptr->stat){
2331       case -1:
2332           status_check(ptr, c);
2333           break;
2334       case 0:
2335           if (c <= DEL){
2336               break;
2337 #ifdef NUMCHAR_OPTION
2338           }else if (is_unicode_capsule(c)){
2339               break;
2340 #endif
2341           }else if (0xa1 <= c && c <= 0xdf){
2342               status_push_ch(ptr, SSO);
2343               status_push_ch(ptr, c);
2344               code_score(ptr);
2345               status_clear(ptr);
2346           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2347               ptr->stat = 1;
2348               status_push_ch(ptr, c);
2349           }else if (0xed <= c && c <= 0xee){
2350               ptr->stat = 3;
2351               status_push_ch(ptr, c);
2352 #ifdef SHIFTJIS_CP932
2353           }else if (is_ibmext_in_sjis(c)){
2354               ptr->stat = 2;
2355               status_push_ch(ptr, c);
2356 #endif /* SHIFTJIS_CP932 */
2357 #ifdef X0212_ENABLE
2358           }else if (0xf0 <= c && c <= 0xfc){
2359               ptr->stat = 1;
2360               status_push_ch(ptr, c);
2361 #endif /* X0212_ENABLE */
2362           }else{
2363               status_disable(ptr);
2364           }
2365           break;
2366       case 1:
2367           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2368               status_push_ch(ptr, c);
2369               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2370               code_score(ptr);
2371               status_clear(ptr);
2372           }else{
2373               status_disable(ptr);
2374           }
2375           break;
2376       case 2:
2377 #ifdef SHIFTJIS_CP932
2378         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2379             status_push_ch(ptr, c);
2380             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2381                 set_code_score(ptr, SCORE_CP932);
2382                 status_clear(ptr);
2383                 break;
2384             }
2385         }
2386 #endif /* SHIFTJIS_CP932 */
2387         status_disable(ptr);
2388           break;
2389       case 3:
2390           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2391               status_push_ch(ptr, c);
2392               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2393             set_code_score(ptr, SCORE_CP932);
2394             status_clear(ptr);
2395           }else{
2396               status_disable(ptr);
2397           }
2398           break;
2399     }
2400 }
2401
2402 void e_status(struct input_code *ptr, nkf_char c)
2403 {
2404     switch (ptr->stat){
2405       case -1:
2406           status_check(ptr, c);
2407           break;
2408       case 0:
2409           if (c <= DEL){
2410               break;
2411 #ifdef NUMCHAR_OPTION
2412           }else if (is_unicode_capsule(c)){
2413               break;
2414 #endif
2415           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2416               ptr->stat = 1;
2417               status_push_ch(ptr, c);
2418 #ifdef X0212_ENABLE
2419           }else if (0x8f == c){
2420               ptr->stat = 2;
2421               status_push_ch(ptr, c);
2422 #endif /* X0212_ENABLE */
2423           }else{
2424               status_disable(ptr);
2425           }
2426           break;
2427       case 1:
2428           if (0xa1 <= c && c <= 0xfe){
2429               status_push_ch(ptr, c);
2430               code_score(ptr);
2431               status_clear(ptr);
2432           }else{
2433               status_disable(ptr);
2434           }
2435           break;
2436 #ifdef X0212_ENABLE
2437       case 2:
2438           if (0xa1 <= c && c <= 0xfe){
2439               ptr->stat = 1;
2440               status_push_ch(ptr, c);
2441           }else{
2442               status_disable(ptr);
2443           }
2444 #endif /* X0212_ENABLE */
2445     }
2446 }
2447
2448 #ifdef UTF8_INPUT_ENABLE
2449 void w_status(struct input_code *ptr, nkf_char c)
2450 {
2451     switch (ptr->stat){
2452       case -1:
2453           status_check(ptr, c);
2454           break;
2455       case 0:
2456           if (c <= DEL){
2457               break;
2458 #ifdef NUMCHAR_OPTION
2459           }else if (is_unicode_capsule(c)){
2460               break;
2461 #endif
2462           }else if (0xc0 <= c && c <= 0xdf){
2463               ptr->stat = 1;
2464               status_push_ch(ptr, c);
2465           }else if (0xe0 <= c && c <= 0xef){
2466               ptr->stat = 2;
2467               status_push_ch(ptr, c);
2468           }else if (0xf0 <= c && c <= 0xf4){
2469               ptr->stat = 3;
2470               status_push_ch(ptr, c);
2471           }else{
2472               status_disable(ptr);
2473           }
2474           break;
2475       case 1:
2476       case 2:
2477           if (0x80 <= c && c <= 0xbf){
2478               status_push_ch(ptr, c);
2479               if (ptr->index > ptr->stat){
2480                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2481                              && ptr->buf[2] == 0xbf);
2482                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2483                            &ptr->buf[0], &ptr->buf[1]);
2484                   if (!bom){
2485                       code_score(ptr);
2486                   }
2487                   status_clear(ptr);
2488               }
2489           }else{
2490               status_disable(ptr);
2491           }
2492           break;
2493       case 3:
2494         if (0x80 <= c && c <= 0xbf){
2495             if (ptr->index < ptr->stat){
2496                 status_push_ch(ptr, c);
2497             } else {
2498                 status_clear(ptr);
2499             }
2500           }else{
2501               status_disable(ptr);
2502           }
2503           break;
2504     }
2505 }
2506 #endif
2507
2508 void code_status(nkf_char c)
2509 {
2510     int action_flag = 1;
2511     struct input_code *result = 0;
2512     struct input_code *p = input_code_list;
2513     while (p->name){
2514         if (!p->status_func) {
2515             ++p;
2516             continue;
2517         }
2518         if (!p->status_func)
2519             continue;
2520         (p->status_func)(p, c);
2521         if (p->stat > 0){
2522             action_flag = 0;
2523         }else if(p->stat == 0){
2524             if (result){
2525                 action_flag = 0;
2526             }else{
2527                 result = p;
2528             }
2529         }
2530         ++p;
2531     }
2532
2533     if (action_flag){
2534         if (result && !estab_f){
2535             set_iconv(TRUE, result->iconv_func);
2536         }else if (c <= DEL){
2537             struct input_code *ptr = input_code_list;
2538             while (ptr->name){
2539                 status_reset(ptr);
2540                 ++ptr;
2541             }
2542         }
2543     }
2544 }
2545
2546 #ifndef WIN32DLL
2547 nkf_char std_getc(FILE *f)
2548 {
2549     if (std_gc_ndx){
2550         return std_gc_buf[--std_gc_ndx];
2551     }
2552     return getc(f);
2553 }
2554 #endif /*WIN32DLL*/
2555
2556 nkf_char std_ungetc(nkf_char c, FILE *f)
2557 {
2558     if (std_gc_ndx == STD_GC_BUFSIZE){
2559         return EOF;
2560     }
2561     std_gc_buf[std_gc_ndx++] = c;
2562     return c;
2563 }
2564
2565 #ifndef WIN32DLL
2566 void std_putc(nkf_char c)
2567 {
2568     if(c!=EOF)
2569       putchar(c);
2570 }
2571 #endif /*WIN32DLL*/
2572
2573 #if !defined(PERL_XS) && !defined(WIN32DLL)
2574 nkf_char noconvert(FILE *f)
2575 {
2576     nkf_char    c;
2577
2578     if (nop_f == 2)
2579         module_connection();
2580     while ((c = (*i_getc)(f)) != EOF)
2581       (*o_putc)(c);
2582     (*o_putc)(EOF);
2583     return 1;
2584 }
2585 #endif
2586
2587 void module_connection(void)
2588 {
2589     oconv = output_conv;
2590     o_putc = std_putc;
2591
2592     /* replace continucation module, from output side */
2593
2594     /* output redicrection */
2595 #ifdef CHECK_OPTION
2596     if (noout_f || guess_f){
2597         o_putc = no_putc;
2598     }
2599 #endif
2600     if (mimeout_f) {
2601         o_mputc = o_putc;
2602         o_putc = mime_putc;
2603         if (mimeout_f == TRUE) {
2604             o_base64conv = oconv; oconv = base64_conv;
2605         }
2606         /* base64_count = 0; */
2607     }
2608
2609     if (nlmode_f || guess_f) {
2610         o_nlconv = oconv; oconv = nl_conv;
2611     }
2612     if (rot_f) {
2613         o_rot_conv = oconv; oconv = rot_conv;
2614     }
2615     if (iso2022jp_f) {
2616         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2617     }
2618     if (hira_f) {
2619         o_hira_conv = oconv; oconv = hira_conv;
2620     }
2621     if (fold_f) {
2622         o_fconv = oconv; oconv = fold_conv;
2623         f_line = 0;
2624     }
2625     if (alpha_f || x0201_f) {
2626         o_zconv = oconv; oconv = z_conv;
2627     }
2628
2629     i_getc = std_getc;
2630     i_ungetc = std_ungetc;
2631     /* input redicrection */
2632 #ifdef INPUT_OPTION
2633     if (cap_f){
2634         i_cgetc = i_getc; i_getc = cap_getc;
2635         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2636     }
2637     if (url_f){
2638         i_ugetc = i_getc; i_getc = url_getc;
2639         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2640     }
2641 #endif
2642 #ifdef NUMCHAR_OPTION
2643     if (numchar_f){
2644         i_ngetc = i_getc; i_getc = numchar_getc;
2645         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2646     }
2647 #endif
2648 #ifdef UNICODE_NORMALIZATION
2649     if (nfc_f && input_f == UTF8_INPUT){
2650         i_nfc_getc = i_getc; i_getc = nfc_getc;
2651         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2652     }
2653 #endif
2654     if (mime_f && mimebuf_f==FIXED_MIME) {
2655         i_mgetc = i_getc; i_getc = mime_getc;
2656         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2657     }
2658     if (broken_f & 1) {
2659         i_bgetc = i_getc; i_getc = broken_getc;
2660         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2661     }
2662     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2663         set_iconv(-TRUE, e_iconv);
2664     } else if (input_f == SJIS_INPUT) {
2665         set_iconv(-TRUE, s_iconv);
2666 #ifdef UTF8_INPUT_ENABLE
2667     } else if (input_f == UTF8_INPUT) {
2668         set_iconv(-TRUE, w_iconv);
2669     } else if (input_f == UTF16_INPUT) {
2670         set_iconv(-TRUE, w_iconv16);
2671     } else if (input_f == UTF32_INPUT) {
2672         set_iconv(-TRUE, w_iconv32);
2673 #endif
2674     } else {
2675         set_iconv(FALSE, e_iconv);
2676     }
2677
2678     {
2679         struct input_code *p = input_code_list;
2680         while (p->name){
2681             status_reinit(p++);
2682         }
2683     }
2684 }
2685
2686 /*
2687  * Check and Ignore BOM
2688  */
2689 void check_bom(FILE *f)
2690 {
2691     int c2;
2692     switch(c2 = (*i_getc)(f)){
2693     case 0x00:
2694         if((c2 = (*i_getc)(f)) == 0x00){
2695             if((c2 = (*i_getc)(f)) == 0xFE){
2696                 if((c2 = (*i_getc)(f)) == 0xFF){
2697                     if(!input_f){
2698                         set_iconv(TRUE, w_iconv32);
2699                     }
2700                     if (iconv == w_iconv32) {
2701                         input_endian = ENDIAN_BIG;
2702                         return;
2703                     }
2704                     (*i_ungetc)(0xFF,f);
2705                 }else (*i_ungetc)(c2,f);
2706                 (*i_ungetc)(0xFE,f);
2707             }else if(c2 == 0xFF){
2708                 if((c2 = (*i_getc)(f)) == 0xFE){
2709                     if(!input_f){
2710                         set_iconv(TRUE, w_iconv32);
2711                     }
2712                     if (iconv == w_iconv32) {
2713                         input_endian = ENDIAN_2143;
2714                         return;
2715                     }
2716                     (*i_ungetc)(0xFF,f);
2717                 }else (*i_ungetc)(c2,f);
2718                 (*i_ungetc)(0xFF,f);
2719             }else (*i_ungetc)(c2,f);
2720             (*i_ungetc)(0x00,f);
2721         }else (*i_ungetc)(c2,f);
2722         (*i_ungetc)(0x00,f);
2723         break;
2724     case 0xEF:
2725         if((c2 = (*i_getc)(f)) == 0xBB){
2726             if((c2 = (*i_getc)(f)) == 0xBF){
2727                 if(!input_f){
2728                     set_iconv(TRUE, w_iconv);
2729                 }
2730                 if (iconv == w_iconv) {
2731                     return;
2732                 }
2733                 (*i_ungetc)(0xBF,f);
2734             }else (*i_ungetc)(c2,f);
2735             (*i_ungetc)(0xBB,f);
2736         }else (*i_ungetc)(c2,f);
2737         (*i_ungetc)(0xEF,f);
2738         break;
2739     case 0xFE:
2740         if((c2 = (*i_getc)(f)) == 0xFF){
2741             if((c2 = (*i_getc)(f)) == 0x00){
2742                 if((c2 = (*i_getc)(f)) == 0x00){
2743                     if(!input_f){
2744                         set_iconv(TRUE, w_iconv32);
2745                     }
2746                     if (iconv == w_iconv32) {
2747                         input_endian = ENDIAN_3412;
2748                         return;
2749                     }
2750                     (*i_ungetc)(0x00,f);
2751                 }else (*i_ungetc)(c2,f);
2752                 (*i_ungetc)(0x00,f);
2753             }else (*i_ungetc)(c2,f);
2754             if(!input_f){
2755                 set_iconv(TRUE, w_iconv16);
2756             }
2757             if (iconv == w_iconv16) {
2758                 input_endian = ENDIAN_BIG;
2759                 return;
2760             }
2761             (*i_ungetc)(0xFF,f);
2762         }else (*i_ungetc)(c2,f);
2763         (*i_ungetc)(0xFE,f);
2764         break;
2765     case 0xFF:
2766         if((c2 = (*i_getc)(f)) == 0xFE){
2767             if((c2 = (*i_getc)(f)) == 0x00){
2768                 if((c2 = (*i_getc)(f)) == 0x00){
2769                     if(!input_f){
2770                         set_iconv(TRUE, w_iconv32);
2771                     }
2772                     if (iconv == w_iconv32) {
2773                         input_endian = ENDIAN_LITTLE;
2774                         return;
2775                     }
2776                     (*i_ungetc)(0x00,f);
2777                 }else (*i_ungetc)(c2,f);
2778                 (*i_ungetc)(0x00,f);
2779             }else (*i_ungetc)(c2,f);
2780             if(!input_f){
2781                 set_iconv(TRUE, w_iconv16);
2782             }
2783             if (iconv == w_iconv16) {
2784                 input_endian = ENDIAN_LITTLE;
2785                 return;
2786             }
2787             (*i_ungetc)(0xFE,f);
2788         }else (*i_ungetc)(c2,f);
2789         (*i_ungetc)(0xFF,f);
2790         break;
2791     default:
2792         (*i_ungetc)(c2,f);
2793         break;
2794     }
2795 }
2796
2797 /*
2798    Conversion main loop. Code detection only.
2799  */
2800
2801 nkf_char kanji_convert(FILE *f)
2802 {
2803     nkf_char    c3, c2=0, c1, c0=0;
2804     int is_8bit = FALSE;
2805
2806     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2807 #ifdef UTF8_INPUT_ENABLE
2808        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2809 #endif
2810       ){
2811         is_8bit = TRUE;
2812     }
2813
2814     input_mode = ASCII;
2815     output_mode = ASCII;
2816     shift_mode = FALSE;
2817
2818 #define NEXT continue      /* no output, get next */
2819 #define SEND ;             /* output c1 and c2, get next */
2820 #define LAST break         /* end of loop, go closing  */
2821
2822     module_connection();
2823     check_bom(f);
2824
2825     while ((c1 = (*i_getc)(f)) != EOF) {
2826 #ifdef INPUT_CODE_FIX
2827         if (!input_f)
2828 #endif
2829             code_status(c1);
2830         if (c2) {
2831             /* second byte */
2832             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2833                 /* in case of 8th bit is on */
2834                 if (!estab_f&&!mime_decode_mode) {
2835                     /* in case of not established yet */
2836                     /* It is still ambiguious */
2837                     if (h_conv(f, c2, c1)==EOF)
2838                         LAST;
2839                     else
2840                         c2 = 0;
2841                     NEXT;
2842                 } else {
2843                     /* in case of already established */
2844                     if (c1 < AT) {
2845                         /* ignore bogus code and not CP5022x UCD */
2846                         c2 = 0;
2847                         NEXT;
2848                     } else {
2849                         SEND;
2850                     }
2851                 }
2852             } else
2853                 /* second byte, 7 bit code */
2854                 /* it might be kanji shitfted */
2855                 if ((c1 == DEL) || (c1 <= SP)) {
2856                     /* ignore bogus first code */
2857                     c2 = 0;
2858                     NEXT;
2859                 } else
2860                     SEND;
2861         } else {
2862             /* first byte */
2863 #ifdef UTF8_INPUT_ENABLE
2864             if (iconv == w_iconv16) {
2865                 if (input_endian == ENDIAN_BIG) {
2866                     c2 = c1;
2867                     if ((c1 = (*i_getc)(f)) != EOF) {
2868                         if (0xD8 <= c2 && c2 <= 0xDB) {
2869                             if ((c0 = (*i_getc)(f)) != EOF) {
2870                                 c0 <<= 8;
2871                                 if ((c3 = (*i_getc)(f)) != EOF) {
2872                                     c0 |= c3;
2873                                 } else c2 = EOF;
2874                             } else c2 = EOF;
2875                         }
2876                     } else c2 = EOF;
2877                 } else {
2878                     if ((c2 = (*i_getc)(f)) != EOF) {
2879                         if (0xD8 <= c2 && c2 <= 0xDB) {
2880                             if ((c3 = (*i_getc)(f)) != EOF) {
2881                                 if ((c0 = (*i_getc)(f)) != EOF) {
2882                                     c0 <<= 8;
2883                                     c0 |= c3;
2884                                 } else c2 = EOF;
2885                             } else c2 = EOF;
2886                         }
2887                     } else c2 = EOF;
2888                 }
2889                 SEND;
2890             } else if(iconv == w_iconv32){
2891                 int c3 = c1;
2892                 if((c2 = (*i_getc)(f)) != EOF &&
2893                    (c1 = (*i_getc)(f)) != EOF &&
2894                    (c0 = (*i_getc)(f)) != EOF){
2895                     switch(input_endian){
2896                     case ENDIAN_BIG:
2897                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2898                         break;
2899                     case ENDIAN_LITTLE:
2900                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2901                         break;
2902                     case ENDIAN_2143:
2903                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2904                         break;
2905                     case ENDIAN_3412:
2906                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2907                         break;
2908                     }
2909                     c2 = 0;
2910                 }else{
2911                     c2 = EOF;
2912                 }
2913                 SEND;
2914             } else
2915 #endif
2916 #ifdef NUMCHAR_OPTION
2917             if (is_unicode_capsule(c1)){
2918                 SEND;
2919             } else
2920 #endif
2921             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2922                 /* 8 bit code */
2923                 if (!estab_f && !iso8859_f) {
2924                     /* not established yet */
2925                     c2 = c1;
2926                     NEXT;
2927                 } else { /* estab_f==TRUE */
2928                     if (iso8859_f) {
2929                         c2 = ISO_8859_1;
2930                         c1 &= 0x7f;
2931                         SEND;
2932                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2933                         /* SJIS X0201 Case... */
2934                         if (iso2022jp_f && !x0201_f) {
2935                             (*oconv)(GETA1, GETA2);
2936                             NEXT;
2937                         } else {
2938                             c2 = JIS_X_0201;
2939                             c1 &= 0x7f;
2940                             SEND;
2941                         }
2942                     } else if (c1==SSO && iconv != s_iconv) {
2943                         /* EUC X0201 Case */
2944                         c1 = (*i_getc)(f);  /* skip SSO */
2945                         code_status(c1);
2946                         if (SSP<=c1 && c1<0xe0) {
2947                             if (iso2022jp_f && !x0201_f) {
2948                                 (*oconv)(GETA1, GETA2);
2949                                 NEXT;
2950                             } else {
2951                                 c2 = JIS_X_0201;
2952                                 c1 &= 0x7f;
2953                                 SEND;
2954                             }
2955                         } else  { /* bogus code, skip SSO and one byte */
2956                             NEXT;
2957                         }
2958                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2959                                (c1 == 0xFD || c1 == 0xFE)) {
2960                         /* CP10001 */
2961                         c2 = JIS_X_0201;
2962                         c1 &= 0x7f;
2963                         SEND;
2964                     } else {
2965                        /* already established */
2966                        c2 = c1;
2967                        NEXT;
2968                     }
2969                 }
2970             } else if ((c1 > SP) && (c1 != DEL)) {
2971                 /* in case of Roman characters */
2972                 if (shift_mode) {
2973                     /* output 1 shifted byte */
2974                     if (iso8859_f) {
2975                         c2 = ISO_8859_1;
2976                         SEND;
2977                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2978                       /* output 1 shifted byte */
2979                         if (iso2022jp_f && !x0201_f) {
2980                             (*oconv)(GETA1, GETA2);
2981                             NEXT;
2982                         } else {
2983                             c2 = JIS_X_0201;
2984                             SEND;
2985                         }
2986                     } else {
2987                         /* look like bogus code */
2988                         NEXT;
2989                     }
2990                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
2991                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
2992                     /* in case of Kanji shifted */
2993                     c2 = c1;
2994                     NEXT;
2995                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2996                     /* Check MIME code */
2997                     if ((c1 = (*i_getc)(f)) == EOF) {
2998                         (*oconv)(0, '=');
2999                         LAST;
3000                     } else if (c1 == '?') {
3001                         /* =? is mime conversion start sequence */
3002                         if(mime_f == STRICT_MIME) {
3003                             /* check in real detail */
3004                             if (mime_begin_strict(f) == EOF)
3005                                 LAST;
3006                             else
3007                                 NEXT;
3008                         } else if (mime_begin(f) == EOF)
3009                             LAST;
3010                         else
3011                             NEXT;
3012                     } else {
3013                         (*oconv)(0, '=');
3014                         (*i_ungetc)(c1,f);
3015                         NEXT;
3016                     }
3017                 } else {
3018                     /* normal ASCII code */
3019                     SEND;
3020                 }
3021             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
3022                 shift_mode = FALSE;
3023                 NEXT;
3024             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
3025                 shift_mode = TRUE;
3026                 NEXT;
3027             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
3028                 if ((c1 = (*i_getc)(f)) == EOF) {
3029                     /*  (*oconv)(0, ESC); don't send bogus code */
3030                     LAST;
3031                 } else if (c1 == '$') {
3032                     if ((c1 = (*i_getc)(f)) == EOF) {
3033                         /*
3034                         (*oconv)(0, ESC); don't send bogus code
3035                         (*oconv)(0, '$'); */
3036                         LAST;
3037                     } else if (c1 == '@'|| c1 == 'B') {
3038                         /* This is kanji introduction */
3039                         input_mode = JIS_X_0208;
3040                         shift_mode = FALSE;
3041                         set_input_codename("ISO-2022-JP");
3042 #ifdef CHECK_OPTION
3043                         debug("ISO-2022-JP");
3044 #endif
3045                         NEXT;
3046                     } else if (c1 == '(') {
3047                         if ((c1 = (*i_getc)(f)) == EOF) {
3048                             /* don't send bogus code
3049                             (*oconv)(0, ESC);
3050                             (*oconv)(0, '$');
3051                             (*oconv)(0, '(');
3052                                 */
3053                             LAST;
3054                         } else if (c1 == '@'|| c1 == 'B') {
3055                             /* This is kanji introduction */
3056                             input_mode = JIS_X_0208;
3057                             shift_mode = FALSE;
3058                             NEXT;
3059 #ifdef X0212_ENABLE
3060                         } else if (c1 == 'D'){
3061                             input_mode = JIS_X_0212;
3062                             shift_mode = FALSE;
3063                             NEXT;
3064 #endif /* X0212_ENABLE */
3065                         } else if (c1 == 0x4F){
3066                             input_mode = JIS_X_0213_1;
3067                             shift_mode = FALSE;
3068                             NEXT;
3069                         } else if (c1 == 0x50){
3070                             input_mode = JIS_X_0213_2;
3071                             shift_mode = FALSE;
3072                             NEXT;
3073                         } else {
3074                             /* could be some special code */
3075                             (*oconv)(0, ESC);
3076                             (*oconv)(0, '$');
3077                             (*oconv)(0, '(');
3078                             (*oconv)(0, c1);
3079                             NEXT;
3080                         }
3081                     } else if (broken_f&0x2) {
3082                         /* accept any ESC-(-x as broken code ... */
3083                         input_mode = JIS_X_0208;
3084                         shift_mode = FALSE;
3085                         NEXT;
3086                     } else {
3087                         (*oconv)(0, ESC);
3088                         (*oconv)(0, '$');
3089                         (*oconv)(0, c1);
3090                         NEXT;
3091                     }
3092                 } else if (c1 == '(') {
3093                     if ((c1 = (*i_getc)(f)) == EOF) {
3094                         /* don't send bogus code
3095                         (*oconv)(0, ESC);
3096                         (*oconv)(0, '('); */
3097                         LAST;
3098                     } else {
3099                         if (c1 == 'I') {
3100                             /* This is X0201 kana introduction */
3101                             input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
3102                             NEXT;
3103                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
3104                             /* This is X0208 kanji introduction */
3105                             input_mode = ASCII; shift_mode = FALSE;
3106                             NEXT;
3107                         } else if (broken_f&0x2) {
3108                             input_mode = ASCII; shift_mode = FALSE;
3109                             NEXT;
3110                         } else {
3111                             (*oconv)(0, ESC);
3112                             (*oconv)(0, '(');
3113                             /* maintain various input_mode here */
3114                             SEND;
3115                         }
3116                     }
3117                } else if ( c1 == 'N' || c1 == 'n'){
3118                    /* SS2 */
3119                    c3 = (*i_getc)(f);  /* skip SS2 */
3120                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
3121                        c1 = c3;
3122                        c2 = JIS_X_0201;
3123                        SEND;
3124                    }else{
3125                        (*i_ungetc)(c3, f);
3126                        /* lonely ESC  */
3127                        (*oconv)(0, ESC);
3128                        SEND;
3129                    }
3130                 } else {
3131                     /* lonely ESC  */
3132                     (*oconv)(0, ESC);
3133                     SEND;
3134                 }
3135             } else if (c1 == ESC && iconv == s_iconv) {
3136                 /* ESC in Shift_JIS */
3137                 if ((c1 = (*i_getc)(f)) == EOF) {
3138                     /*  (*oconv)(0, ESC); don't send bogus code */
3139                     LAST;
3140                 } else if (c1 == '$') {
3141                     /* J-PHONE emoji */
3142                     if ((c1 = (*i_getc)(f)) == EOF) {
3143                         /*
3144                            (*oconv)(0, ESC); don't send bogus code
3145                            (*oconv)(0, '$'); */
3146                         LAST;
3147                     } else {
3148                         if (('E' <= c1 && c1 <= 'G') ||
3149                             ('O' <= c1 && c1 <= 'Q')) {
3150                             /*
3151                                NUM : 0 1 2 3 4 5
3152                                BYTE: G E F O P Q
3153                                C%7 : 1 6 0 2 3 4
3154                                C%7 : 0 1 2 3 4 5 6
3155                                NUM : 2 0 3 4 5 X 1
3156                              */
3157                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
3158                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
3159                             while ((c1 = (*i_getc)(f)) != EOF) {
3160                                 if (SP <= c1 && c1 <= 'z') {
3161                                     (*oconv)(0, c1 + c0);
3162                                 } else break; /* c1 == SO */
3163                             }
3164                         }
3165                     }
3166                     if (c1 == EOF) LAST;
3167                     NEXT;
3168                 } else {
3169                     /* lonely ESC  */
3170                     (*oconv)(0, ESC);
3171                     SEND;
3172                 }
3173             } else if (c1 == LF || c1 == CR) {
3174                 if (broken_f&4) {
3175                     input_mode = ASCII; set_iconv(FALSE, 0);
3176                     SEND;
3177                 } else if (mime_decode_f && !mime_decode_mode){
3178                     if (c1 == LF) {
3179                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
3180                             i_ungetc(SP,f);
3181                             continue;
3182                         } else {
3183                             i_ungetc(c1,f);
3184                         }
3185                         c1 = LF;
3186                         SEND;
3187                     } else  { /* if (c1 == CR)*/
3188                         if ((c1=(*i_getc)(f))!=EOF) {
3189                             if (c1==SP) {
3190                                 i_ungetc(SP,f);
3191                                 continue;
3192                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
3193                                 i_ungetc(SP,f);
3194                                 continue;
3195                             } else {
3196                                 i_ungetc(c1,f);
3197                             }
3198                             i_ungetc(LF,f);
3199                         } else {
3200                             i_ungetc(c1,f);
3201                         }
3202                         c1 = CR;
3203                         SEND;
3204                     }
3205                 }
3206             } else if (c1 == DEL && input_mode == JIS_X_0208) {
3207                 /* CP5022x */
3208                 c2 = c1;
3209                 NEXT;
3210             } else
3211                 SEND;
3212         }
3213         /* send: */
3214         switch(input_mode){
3215         case ASCII:
3216             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
3217             case -2:
3218                 /* 4 bytes UTF-8 */
3219                 if ((c0 = (*i_getc)(f)) != EOF) {
3220                     code_status(c0);
3221                     c0 <<= 8;
3222                     if ((c3 = (*i_getc)(f)) != EOF) {
3223                         code_status(c3);
3224                         (*iconv)(c2, c1, c0|c3);
3225                     }
3226                 }
3227                 break;
3228             case -1:
3229                 /* 3 bytes EUC or UTF-8 */
3230                 if ((c0 = (*i_getc)(f)) != EOF) {
3231                     code_status(c0);
3232                     (*iconv)(c2, c1, c0);
3233                 }
3234                 break;
3235             }
3236             break;
3237         case JIS_X_0208:
3238         case JIS_X_0213_1:
3239             if (ms_ucs_map_f &&
3240                 0x7F <= c2 && c2 <= 0x92 &&
3241                 0x21 <= c1 && c1 <= 0x7E) {
3242                 /* CP932 UDC */
3243                 if(c1 == 0x7F) return 0;
3244                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
3245                 c2 = 0;
3246             }
3247             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
3248             break;
3249 #ifdef X0212_ENABLE
3250         case JIS_X_0212:
3251             (*oconv)(PREFIX_EUCG3 | c2, c1);
3252             break;
3253 #endif /* X0212_ENABLE */
3254         case JIS_X_0213_2:
3255             (*oconv)(PREFIX_EUCG3 | c2, c1);
3256             break;
3257         default:
3258             (*oconv)(input_mode, c1);  /* other special case */
3259         }
3260
3261         c2 = 0;
3262         c0 = 0;
3263         continue;
3264         /* goto next_word */
3265     }
3266
3267     /* epilogue */
3268     (*iconv)(EOF, 0, 0);
3269     if (!input_codename)
3270     {
3271         if (is_8bit) {
3272             struct input_code *p = input_code_list;
3273             struct input_code *result = p;
3274             while (p->name){
3275                 if (p->score < result->score) result = p;
3276                 ++p;
3277             }
3278             set_input_codename(result->name);
3279 #ifdef CHECK_OPTION
3280             debug(result->name);
3281 #endif
3282         }
3283     }
3284     return 1;
3285 }
3286
3287 nkf_char
3288 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3289 {
3290     nkf_char ret, c3, c0;
3291     int hold_index;
3292
3293
3294     /** it must NOT be in the kanji shifte sequence      */
3295     /** it must NOT be written in JIS7                   */
3296     /** and it must be after 2 byte 8bit code            */
3297
3298     hold_count = 0;
3299     push_hold_buf(c2);
3300     push_hold_buf(c1);
3301
3302     while ((c1 = (*i_getc)(f)) != EOF) {
3303         if (c1 == ESC){
3304             (*i_ungetc)(c1,f);
3305             break;
3306         }
3307         code_status(c1);
3308         if (push_hold_buf(c1) == EOF || estab_f){
3309             break;
3310         }
3311     }
3312
3313     if (!estab_f){
3314         struct input_code *p = input_code_list;
3315         struct input_code *result = p;
3316         if (c1 == EOF){
3317             code_status(c1);
3318         }
3319         while (p->name){
3320             if (p->status_func && p->score < result->score){
3321                 result = p;
3322             }
3323             ++p;
3324         }
3325         set_iconv(TRUE, result->iconv_func);
3326     }
3327
3328
3329     /** now,
3330      ** 1) EOF is detected, or
3331      ** 2) Code is established, or
3332      ** 3) Buffer is FULL (but last word is pushed)
3333      **
3334      ** in 1) and 3) cases, we continue to use
3335      ** Kanji codes by oconv and leave estab_f unchanged.
3336      **/
3337
3338     ret = c1;
3339     hold_index = 0;
3340     while (hold_index < hold_count){
3341         c2 = hold_buf[hold_index++];
3342         if (c2 <= DEL
3343 #ifdef NUMCHAR_OPTION
3344             || is_unicode_capsule(c2)
3345 #endif
3346             ){
3347             (*iconv)(0, c2, 0);
3348             continue;
3349         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3350             (*iconv)(JIS_X_0201, c2, 0);
3351             continue;
3352         }
3353         if (hold_index < hold_count){
3354             c1 = hold_buf[hold_index++];
3355         }else{
3356             c1 = (*i_getc)(f);
3357             if (c1 == EOF){
3358                 c3 = EOF;
3359                 break;
3360             }
3361             code_status(c1);
3362         }
3363         c0 = 0;
3364         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3365         case -2:
3366             /* 4 bytes UTF-8 */
3367             if (hold_index < hold_count){
3368                 c0 = hold_buf[hold_index++];
3369             } else if ((c0 = (*i_getc)(f)) == EOF) {
3370                 ret = EOF;
3371                 break;
3372             } else {
3373                 code_status(c0);
3374                 c0 <<= 8;
3375                 if (hold_index < hold_count){
3376                     c3 = hold_buf[hold_index++];
3377                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3378                     c0 = ret = EOF;
3379                     break;
3380                 } else {
3381                     code_status(c3);
3382                     (*iconv)(c2, c1, c0|c3);
3383                 }
3384             }
3385             break;
3386         case -1:
3387             /* 3 bytes EUC or UTF-8 */
3388             if (hold_index < hold_count){
3389                 c0 = hold_buf[hold_index++];
3390             } else if ((c0 = (*i_getc)(f)) == EOF) {
3391                 ret = EOF;
3392                 break;
3393             } else {
3394                 code_status(c0);
3395             }
3396             (*iconv)(c2, c1, c0);
3397             break;
3398         }
3399         if (c0 == EOF) break;
3400     }
3401     return ret;
3402 }
3403
3404 nkf_char push_hold_buf(nkf_char c2)
3405 {
3406     if (hold_count >= HOLD_SIZE*2)
3407         return (EOF);
3408     hold_buf[hold_count++] = (unsigned char)c2;
3409     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3410 }
3411
3412 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3413 {
3414 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3415     nkf_char val;
3416 #endif
3417     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3418 #ifdef SHIFTJIS_CP932
3419     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3420         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3421         if (val){
3422             c2 = val >> 8;
3423             c1 = val & 0xff;
3424         }
3425     }
3426     if (cp932inv_f
3427         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3428         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3429         if (c){
3430             c2 = c >> 8;
3431             c1 = c & 0xff;
3432         }
3433     }
3434 #endif /* SHIFTJIS_CP932 */
3435 #ifdef X0212_ENABLE
3436     if (!x0213_f && is_ibmext_in_sjis(c2)){
3437         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3438         if (val){
3439             if (val > 0x7FFF){
3440                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3441                 c1 = val & 0xff;
3442             }else{
3443                 c2 = val >> 8;
3444                 c1 = val & 0xff;
3445             }
3446             if (p2) *p2 = c2;
3447             if (p1) *p1 = c1;
3448             return 0;
3449         }
3450     }
3451 #endif
3452     if(c2 >= 0x80){
3453         if(x0213_f && c2 >= 0xF0){
3454             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3455                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3456             }else{ /* 78<=k<=94 */
3457                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3458                 if (0x9E < c1) c2++;
3459             }
3460         }else{
3461             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3462             if (0x9E < c1) c2++;
3463         }
3464         if (c1 < 0x9F)
3465             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3466         else {
3467             c1 = c1 - 0x7E;
3468         }
3469     }
3470
3471 #ifdef X0212_ENABLE
3472     c2 = x0212_unshift(c2);
3473 #endif
3474     if (p2) *p2 = c2;
3475     if (p1) *p1 = c1;
3476     return 0;
3477 }
3478
3479 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3480 {
3481     if (c2 == JIS_X_0201) {
3482         c1 &= 0x7f;
3483     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3484         /* NOP */
3485     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3486         /* CP932 UDC */
3487         if(c1 == 0x7F) return 0;
3488         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3489         c2 = 0;
3490     } else {
3491         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3492         if (ret) return ret;
3493     }
3494     (*oconv)(c2, c1);
3495     return 0;
3496 }
3497
3498 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3499 {
3500     if (c2 == JIS_X_0201) {
3501         c1 &= 0x7f;
3502 #ifdef X0212_ENABLE
3503     }else if (c2 == 0x8f){
3504         if (c0 == 0){
3505             return -1;
3506         }
3507         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3508             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3509             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3510             c2 = 0;
3511         } else {
3512             c2 = (c2 << 8) | (c1 & 0x7f);
3513             c1 = c0 & 0x7f;
3514 #ifdef SHIFTJIS_CP932
3515             if (cp51932_f){
3516                 nkf_char s2, s1;
3517                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3518                     s2e_conv(s2, s1, &c2, &c1);
3519                     if (c2 < 0x100){
3520                         c1 &= 0x7f;
3521                         c2 &= 0x7f;
3522                     }
3523                 }
3524             }
3525 #endif /* SHIFTJIS_CP932 */
3526         }
3527 #endif /* X0212_ENABLE */
3528     } else if (c2 == SSO){
3529         c2 = JIS_X_0201;
3530         c1 &= 0x7f;
3531     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3532         /* NOP */
3533     } else {
3534         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3535             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3536             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3537             c2 = 0;
3538         } else {
3539             c1 &= 0x7f;
3540             c2 &= 0x7f;
3541 #ifdef SHIFTJIS_CP932
3542             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3543                 nkf_char s2, s1;
3544                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3545                     s2e_conv(s2, s1, &c2, &c1);
3546                     if (c2 < 0x100){
3547                         c1 &= 0x7f;
3548                         c2 &= 0x7f;
3549                     }
3550                 }
3551             }
3552 #endif /* SHIFTJIS_CP932 */
3553         }
3554     }
3555     (*oconv)(c2, c1);
3556     return 0;
3557 }
3558
3559 #ifdef UTF8_INPUT_ENABLE
3560 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3561 {
3562     nkf_char ret = 0;
3563
3564     if (!c1){
3565         *p2 = 0;
3566         *p1 = c2;
3567     }else if (0xc0 <= c2 && c2 <= 0xef) {
3568         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3569 #ifdef NUMCHAR_OPTION
3570         if (ret > 0){
3571             if (p2) *p2 = 0;
3572             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3573             ret = 0;
3574         }
3575 #endif
3576     }
3577     return ret;
3578 }
3579
3580 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3581 {
3582     nkf_char ret = 0;
3583     static const char w_iconv_utf8_1st_byte[] =
3584     { /* 0xC0 - 0xFF */
3585         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3586         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3587         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3588         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3589
3590     if (c2 < 0 || 0xff < c2) {
3591     }else if (c2 == 0) { /* 0 : 1 byte*/
3592         c0 = 0;
3593     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3594         return 0;
3595     } else{
3596         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3597         case 21:
3598             if (c1 < 0x80 || 0xBF < c1) return 0;
3599             break;
3600         case 30:
3601             if (c0 == 0) return -1;
3602             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3603                 return 0;
3604             break;
3605         case 31:
3606         case 33:
3607             if (c0 == 0) return -1;
3608             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3609                 return 0;
3610             break;
3611         case 32:
3612             if (c0 == 0) return -1;
3613             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3614                 return 0;
3615             break;
3616         case 40:
3617             if (c0 == 0) return -2;
3618             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3619                 return 0;
3620             break;
3621         case 41:
3622             if (c0 == 0) return -2;
3623             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3624                 return 0;
3625             break;
3626         case 42:
3627             if (c0 == 0) return -2;
3628             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3629                 return 0;
3630             break;
3631         default:
3632             return 0;
3633             break;
3634         }
3635     }
3636     if (c2 == 0 || c2 == EOF){
3637     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3638         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3639         c2 = 0;
3640     } else {
3641         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3642     }
3643     if (ret == 0){
3644         (*oconv)(c2, c1);
3645     }
3646     return ret;
3647 }
3648 #endif
3649
3650 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3651 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3652 {
3653     val &= VALUE_MASK;
3654     if (val < 0x80){
3655         *p2 = val;
3656         *p1 = 0;
3657         *p0 = 0;
3658     }else if (val < 0x800){
3659         *p2 = 0xc0 | (val >> 6);
3660         *p1 = 0x80 | (val & 0x3f);
3661         *p0 = 0;
3662     } else if (val <= NKF_INT32_C(0xFFFF)) {
3663         *p2 = 0xe0 | (val >> 12);
3664         *p1 = 0x80 | ((val >> 6) & 0x3f);
3665         *p0 = 0x80 | (val        & 0x3f);
3666     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3667         *p2 = 0xe0 |  (val >> 16);
3668         *p1 = 0x80 | ((val >> 12) & 0x3f);
3669         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3670     } else {
3671         *p2 = 0;
3672         *p1 = 0;
3673         *p0 = 0;
3674     }
3675 }
3676 #endif
3677
3678 #ifdef UTF8_INPUT_ENABLE
3679 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3680 {
3681     nkf_char val;
3682     if (c2 >= 0xf8) {
3683         val = -1;
3684     } else if (c2 >= 0xf0){
3685         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3686         val = (c2 & 0x0f) << 18;
3687         val |= (c1 & 0x3f) << 12;
3688         val |= (c0 & 0x3f00) >> 2;
3689         val |= (c0 & 0x3f);
3690     }else if (c2 >= 0xe0){
3691         val = (c2 & 0x0f) << 12;
3692         val |= (c1 & 0x3f) << 6;
3693         val |= (c0 & 0x3f);
3694     }else if (c2 >= 0xc0){
3695         val = (c2 & 0x1f) << 6;
3696         val |= (c1 & 0x3f);
3697     }else{
3698         val = c2;
3699     }
3700     return val;
3701 }
3702
3703 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3704 {
3705     nkf_char c2, c1, c0;
3706     nkf_char ret = 0;
3707     val &= VALUE_MASK;
3708     if (val < 0x80){
3709         *p2 = 0;
3710         *p1 = val;
3711     }else{
3712         w16w_conv(val, &c2, &c1, &c0);
3713         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3714 #ifdef NUMCHAR_OPTION
3715         if (ret > 0){
3716             *p2 = 0;
3717             *p1 = CLASS_UNICODE | val;
3718             ret = 0;
3719         }
3720 #endif
3721     }
3722     return ret;
3723 }
3724 #endif
3725
3726 #ifdef UTF8_INPUT_ENABLE
3727 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3728 {
3729     nkf_char ret = 0;
3730     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3731         (*oconv)(c2, c1);
3732         return 0;
3733     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3734         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3735             return -2;
3736         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3737         c2 = 0;
3738     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3739         /*
3740            return 2;
3741         */
3742         return 1;
3743     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3744     if (ret) return ret;
3745     (*oconv)(c2, c1);
3746     return 0;
3747 }
3748
3749 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3750 {
3751     int ret = 0;
3752
3753     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3754     } else if (is_unicode_bmp(c1)) {
3755         ret = w16e_conv(c1, &c2, &c1);
3756     } else {
3757         c2 = 0;
3758         c1 =  CLASS_UNICODE | c1;
3759     }
3760     if (ret) return ret;
3761     (*oconv)(c2, c1);
3762     return 0;
3763 }
3764
3765 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3766 {
3767     const unsigned short *const *pp;
3768     const unsigned short *const *const *ppp;
3769     static const char no_best_fit_chars_table_C2[] =
3770     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3771         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3772         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3773         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3774     static const char no_best_fit_chars_table_C2_ms[] =
3775     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3776         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3777         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3778         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3779     static const char no_best_fit_chars_table_932_C2[] =
3780     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3781         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3782         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3783         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3784     static const char no_best_fit_chars_table_932_C3[] =
3785     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3786         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3787         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3788         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3789     nkf_char ret = 0;
3790
3791     if(c2 < 0x80){
3792         *p2 = 0;
3793         *p1 = c2;
3794     }else if(c2 < 0xe0){
3795         if(no_best_fit_chars_f){
3796             if(ms_ucs_map_f == UCS_MAP_CP932){
3797                 switch(c2){
3798                 case 0xC2:
3799                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3800                     break;
3801                 case 0xC3:
3802                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3803                     break;
3804                 }
3805             }else if(!cp932inv_f){
3806                 switch(c2){
3807                 case 0xC2:
3808                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3809                     break;
3810                 case 0xC3:
3811                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3812                     break;
3813                 }
3814             }else if(ms_ucs_map_f == UCS_MAP_MS){
3815                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3816             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3817                 switch(c2){
3818                 case 0xC2:
3819                     switch(c1){
3820                     case 0xA2:
3821                     case 0xA3:
3822                     case 0xA5:
3823                     case 0xA6:
3824                     case 0xAC:
3825                     case 0xAF:
3826                     case 0xB8:
3827                         return 1;
3828                     }
3829                     break;
3830                 }
3831             }
3832         }
3833         pp =
3834             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3835             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3836             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3837             utf8_to_euc_2bytes;
3838         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3839     }else if(c0 < 0xF0){
3840         if(no_best_fit_chars_f){
3841             if(ms_ucs_map_f == UCS_MAP_CP932){
3842                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3843             }else if(ms_ucs_map_f == UCS_MAP_MS){
3844                 switch(c2){
3845                 case 0xE2:
3846                     switch(c1){
3847                     case 0x80:
3848                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3849                         break;
3850                     case 0x88:
3851                         if(c0 == 0x92) return 1;
3852                         break;
3853                     }
3854                     break;
3855                 case 0xE3:
3856                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3857                     break;
3858                 }
3859             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3860                 switch(c2){
3861                 case 0xE3:
3862                     switch(c1){
3863                     case 0x82:
3864                             if(c0 == 0x94) return 1;
3865                         break;
3866                     case 0x83:
3867                             if(c0 == 0xBB) return 1;
3868                         break;
3869                     }
3870                     break;
3871                 }
3872             }else{
3873                 switch(c2){
3874                 case 0xE2:
3875                     switch(c1){
3876                     case 0x80:
3877                         if(c0 == 0x95) return 1;
3878                         break;
3879                     case 0x88:
3880                         if(c0 == 0xA5) return 1;
3881                         break;
3882                     }
3883                     break;
3884                 case 0xEF:
3885                     switch(c1){
3886                     case 0xBC:
3887                         if(c0 == 0x8D) return 1;
3888                         break;
3889                     case 0xBD:
3890                         if(c0 == 0x9E && !cp932inv_f) return 1;
3891                         break;
3892                     case 0xBF:
3893                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3894                         break;
3895                     }
3896                     break;
3897                 }
3898             }
3899         }
3900         ppp =
3901             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3902             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3903             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3904             utf8_to_euc_3bytes;
3905         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3906     }else return -1;
3907 #ifdef SHIFTJIS_CP932
3908     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3909         nkf_char s2, s1;
3910         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3911             s2e_conv(s2, s1, p2, p1);
3912         }else{
3913             ret = 1;
3914         }
3915     }
3916 #endif
3917     return ret;
3918 }
3919
3920 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3921 {
3922     nkf_char c2;
3923     const unsigned short *p;
3924     unsigned short val;
3925
3926     if (pp == 0) return 1;
3927
3928     c1 -= 0x80;
3929     if (c1 < 0 || psize <= c1) return 1;
3930     p = pp[c1];
3931     if (p == 0)  return 1;
3932
3933     c0 -= 0x80;
3934     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3935     val = p[c0];
3936     if (val == 0) return 1;
3937     if (no_cp932ext_f && (
3938         (val>>8) == 0x2D || /* NEC special characters */
3939         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3940         )) return 1;
3941
3942     c2 = val >> 8;
3943    if (val > 0x7FFF){
3944         c2 &= 0x7f;
3945         c2 |= PREFIX_EUCG3;
3946     }
3947     if (c2 == SO) c2 = JIS_X_0201;
3948     c1 = val & 0x7f;
3949     if (p2) *p2 = c2;
3950     if (p1) *p1 = c1;
3951     return 0;
3952 }
3953
3954 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3955 {
3956     int shift = 20;
3957     c &= VALUE_MASK;
3958     while(shift >= 0){
3959         if(c >= 1<<shift){
3960             while(shift >= 0){
3961                 (*f)(0, bin2hex(c>>shift));
3962                 shift -= 4;
3963             }
3964         }else{
3965             shift -= 4;
3966         }
3967     }
3968     return;
3969 }
3970
3971 void encode_fallback_html(nkf_char c)
3972 {
3973     (*oconv)(0, '&');
3974     (*oconv)(0, '#');
3975     c &= VALUE_MASK;
3976     if(c >= NKF_INT32_C(1000000))
3977         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3978     if(c >= NKF_INT32_C(100000))
3979         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3980     if(c >= 10000)
3981         (*oconv)(0, 0x30+(c/10000  )%10);
3982     if(c >= 1000)
3983         (*oconv)(0, 0x30+(c/1000   )%10);
3984     if(c >= 100)
3985         (*oconv)(0, 0x30+(c/100    )%10);
3986     if(c >= 10)
3987         (*oconv)(0, 0x30+(c/10     )%10);
3988     if(c >= 0)
3989         (*oconv)(0, 0x30+ c         %10);
3990     (*oconv)(0, ';');
3991     return;
3992 }
3993
3994 void encode_fallback_xml(nkf_char c)
3995 {
3996     (*oconv)(0, '&');
3997     (*oconv)(0, '#');
3998     (*oconv)(0, 'x');
3999     nkf_each_char_to_hex(oconv, c);
4000     (*oconv)(0, ';');
4001     return;
4002 }
4003
4004 void encode_fallback_java(nkf_char c)
4005 {
4006     (*oconv)(0, '\\');
4007     c &= VALUE_MASK;
4008     if(!is_unicode_bmp(c)){
4009         (*oconv)(0, 'U');
4010         (*oconv)(0, '0');
4011         (*oconv)(0, '0');
4012         (*oconv)(0, bin2hex(c>>20));
4013         (*oconv)(0, bin2hex(c>>16));
4014     }else{
4015         (*oconv)(0, 'u');
4016     }
4017     (*oconv)(0, bin2hex(c>>12));
4018     (*oconv)(0, bin2hex(c>> 8));
4019     (*oconv)(0, bin2hex(c>> 4));
4020     (*oconv)(0, bin2hex(c    ));
4021     return;
4022 }
4023
4024 void encode_fallback_perl(nkf_char c)
4025 {
4026     (*oconv)(0, '\\');
4027     (*oconv)(0, 'x');
4028     (*oconv)(0, '{');
4029     nkf_each_char_to_hex(oconv, c);
4030     (*oconv)(0, '}');
4031     return;
4032 }
4033
4034 void encode_fallback_subchar(nkf_char c)
4035 {
4036     c = unicode_subchar;
4037     (*oconv)((c>>8)&0xFF, c&0xFF);
4038     return;
4039 }
4040 #endif
4041
4042 #ifdef UTF8_OUTPUT_ENABLE
4043 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
4044 {
4045     const unsigned short *p;
4046
4047     if (c2 == JIS_X_0201) {
4048         if (ms_ucs_map_f == UCS_MAP_CP10001) {
4049             switch (c1) {
4050             case 0x20:
4051                 return 0xA0;
4052             case 0x7D:
4053                 return 0xA9;
4054             }
4055         }
4056         p = euc_to_utf8_1byte;
4057 #ifdef X0212_ENABLE
4058     } else if (is_eucg3(c2)){
4059         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
4060             return 0xA6;
4061         }
4062         c2 = (c2&0x7f) - 0x21;
4063         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4064             p = x0212_to_utf8_2bytes[c2];
4065         else
4066             return 0;
4067 #endif
4068     } else {
4069         c2 &= 0x7f;
4070         c2 = (c2&0x7f) - 0x21;
4071         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4072             p =
4073                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
4074                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
4075                 euc_to_utf8_2bytes_ms[c2];
4076         else
4077             return 0;
4078     }
4079     if (!p) return 0;
4080     c1 = (c1 & 0x7f) - 0x21;
4081     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
4082         return p[c1];
4083     return 0;
4084 }
4085
4086 void w_oconv(nkf_char c2, nkf_char c1)
4087 {
4088     nkf_char c0;
4089     nkf_char val;
4090
4091     if (output_bom_f) {
4092         output_bom_f = FALSE;
4093         (*o_putc)('\357');
4094         (*o_putc)('\273');
4095         (*o_putc)('\277');
4096     }
4097
4098     if (c2 == EOF) {
4099         (*o_putc)(EOF);
4100         return;
4101     }
4102
4103 #ifdef NUMCHAR_OPTION
4104     if (c2 == 0 && is_unicode_capsule(c1)){
4105         val = c1 & VALUE_MASK;
4106         if (val < 0x80){
4107             (*o_putc)(val);
4108         }else if (val < 0x800){
4109             (*o_putc)(0xC0 | (val >> 6));
4110             (*o_putc)(0x80 | (val & 0x3f));
4111         } else if (val <= NKF_INT32_C(0xFFFF)) {
4112             (*o_putc)(0xE0 | (val >> 12));
4113             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
4114             (*o_putc)(0x80 | (val        & 0x3f));
4115         } else if (val <= NKF_INT32_C(0x10FFFF)) {
4116             (*o_putc)(0xF0 | ( val>>18));
4117             (*o_putc)(0x80 | ((val>>12) & 0x3f));
4118             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
4119             (*o_putc)(0x80 | ( val      & 0x3f));
4120         }
4121         return;
4122     }
4123 #endif
4124
4125     if (c2 == 0) {
4126         output_mode = ASCII;
4127         (*o_putc)(c1);
4128     } else if (c2 == ISO_8859_1) {
4129         output_mode = UTF_8;
4130         (*o_putc)(c1 | 0x080);
4131     } else {
4132         output_mode = UTF_8;
4133         val = e2w_conv(c2, c1);
4134         if (val){
4135             w16w_conv(val, &c2, &c1, &c0);
4136             (*o_putc)(c2);
4137             if (c1){
4138                 (*o_putc)(c1);
4139                 if (c0) (*o_putc)(c0);
4140             }
4141         }
4142     }
4143 }
4144
4145 void w_oconv16(nkf_char c2, nkf_char c1)
4146 {
4147     if (output_bom_f) {
4148         output_bom_f = FALSE;
4149         if (output_endian == ENDIAN_LITTLE){
4150             (*o_putc)((unsigned char)'\377');
4151             (*o_putc)('\376');
4152         }else{
4153             (*o_putc)('\376');
4154             (*o_putc)((unsigned char)'\377');
4155         }
4156     }
4157
4158     if (c2 == EOF) {
4159         (*o_putc)(EOF);
4160         return;
4161     }
4162
4163     if (c2 == ISO_8859_1) {
4164         c2 = 0;
4165         c1 |= 0x80;
4166 #ifdef NUMCHAR_OPTION
4167     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4168         if (is_unicode_bmp(c1)) {
4169             c2 = (c1 >> 8) & 0xff;
4170             c1 &= 0xff;
4171         } else {
4172             c1 &= VALUE_MASK;
4173             if (c1 <= UNICODE_MAX) {
4174                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
4175                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
4176                 if (output_endian == ENDIAN_LITTLE){
4177                     (*o_putc)(c2 & 0xff);
4178                     (*o_putc)((c2 >> 8) & 0xff);
4179                     (*o_putc)(c1 & 0xff);
4180                     (*o_putc)((c1 >> 8) & 0xff);
4181                 }else{
4182                     (*o_putc)((c2 >> 8) & 0xff);
4183                     (*o_putc)(c2 & 0xff);
4184                     (*o_putc)((c1 >> 8) & 0xff);
4185                     (*o_putc)(c1 & 0xff);
4186                 }
4187             }
4188             return;
4189         }
4190 #endif
4191     } else if (c2) {
4192         nkf_char val = e2w_conv(c2, c1);
4193         c2 = (val >> 8) & 0xff;
4194         c1 = val & 0xff;
4195         if (!val) return;
4196     }
4197     if (output_endian == ENDIAN_LITTLE){
4198         (*o_putc)(c1);
4199         (*o_putc)(c2);
4200     }else{
4201         (*o_putc)(c2);
4202         (*o_putc)(c1);
4203     }
4204 }
4205
4206 void w_oconv32(nkf_char c2, nkf_char c1)