OSDN Git Service

* Fix: core dumped when invalid output encoding was given.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.161 2007/12/23 09:25:35 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-12-23"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #if defined(DEFAULT_CODE_JIS)
44 #elif defined(DEFAULT_CODE_SJIS)
45 #elif defined(DEFAULT_CODE_EUC)
46 #elif defined(DEFAULT_CODE_UTF8)
47 #else
48 #define DEFAULT_CODE_JIS 1
49 #endif
50
51 #ifndef MIME_DECODE_DEFAULT
52 #define MIME_DECODE_DEFAULT STRICT_MIME
53 #endif
54 #ifndef X0201_DEFAULT
55 #define X0201_DEFAULT TRUE
56 #endif
57
58 #if DEFAULT_NEWLINE == 0x0D0A
59 #define PUT_NEWLINE(func) do {\
60     func(0x0D);\
61     func(0x0A);\
62 } while (0)
63 #define OCONV_NEWLINE(func) do {\
64     func(0, 0x0D);\
65     func(0, 0x0A);\
66 } while (0)
67 #elif DEFAULT_NEWLINE == 0x0D
68 #define PUT_NEWLINE(func) func(0x0D)
69 #define OCONV_NEWLINE(func) func(0, 0x0D)
70 #else
71 #define DEFAULT_NEWLINE 0x0A
72 #define PUT_NEWLINE(func) func(0x0A)
73 #define OCONV_NEWLINE(func) func(0, 0x0A)
74 #endif
75 #ifdef HELP_OUTPUT_STDERR
76 #define HELP_OUTPUT stderr
77 #else
78 #define HELP_OUTPUT stdout
79 #endif
80
81 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
82 #define MSDOS
83 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
84 #define __WIN32__
85 #endif
86 #endif
87
88 #ifdef PERL_XS
89 #undef OVERWRITE
90 #endif
91
92 #ifndef PERL_XS
93 #include <stdio.h>
94 #endif
95
96 #include <stdlib.h>
97 #include <string.h>
98
99 #if defined(MSDOS) || defined(__OS2__)
100 #include <fcntl.h>
101 #include <io.h>
102 #if defined(_MSC_VER) || defined(__WATCOMC__)
103 #define mktemp _mktemp
104 #endif
105 #endif
106
107 #ifdef MSDOS
108 #ifdef LSI_C
109 #define setbinmode(fp) fsetbin(fp)
110 #elif defined(__DJGPP__)
111 #include <libc/dosio.h>
112 #define setbinmode(fp) djgpp_setbinmode(fp)
113 #else /* Microsoft C, Turbo C */
114 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
115 #endif
116 #else /* UNIX */
117 #define setbinmode(fp)
118 #endif
119
120 #if defined(__DJGPP__)
121 void  djgpp_setbinmode(FILE *fp)
122 {
123     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
124     int fd, m;
125     fd = fileno(fp);
126     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
127     __file_handle_set(fd, m);
128 }
129 #endif
130
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
133 #else /* BSD */
134 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
135 #endif
136
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
139 #define         EASYWIN
140 #ifndef __WIN16__
141 #define __WIN16__
142 #endif
143 #include <windows.h>
144 #endif
145
146 #ifdef OVERWRITE
147 /* added by satoru@isoternet.org */
148 #if defined(__EMX__)
149 #include <sys/types.h>
150 #endif
151 #include <sys/stat.h>
152 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
153 #include <unistd.h>
154 #if defined(__WATCOMC__)
155 #include <sys/utime.h>
156 #else
157 #include <utime.h>
158 #endif
159 #else /* defined(MSDOS) */
160 #ifdef __WIN32__
161 #ifdef __BORLANDC__ /* BCC32 */
162 #include <utime.h>
163 #else /* !defined(__BORLANDC__) */
164 #include <sys/utime.h>
165 #endif /* (__BORLANDC__) */
166 #else /* !defined(__WIN32__) */
167 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
168 #include <sys/utime.h>
169 #elif defined(__TURBOC__) /* BCC */
170 #include <utime.h>
171 #elif defined(LSI_C) /* LSI C */
172 #endif /* (__WIN32__) */
173 #endif
174 #endif
175 #endif
176
177 #define         FALSE   0
178 #define         TRUE    1
179
180 /* state of output_mode and input_mode
181
182    c2           0 means ASCII
183                 JIS_X_0201
184                 ISO_8859_1
185                 JIS_X_0208
186                 EOF      all termination
187    c1           32bit data
188
189  */
190
191 /* Input Assumption */
192
193 #define         JIS_INPUT       4
194 #define         EUC_INPUT      16
195 #define         SJIS_INPUT      5
196 #define         LATIN1_INPUT    6
197 #define         UTF8_INPUT     13
198 #define         UTF16_INPUT    1015
199 #define         UTF32_INPUT    1017
200
201 #define         FIXED_MIME      7
202 #define         STRICT_MIME     8
203
204 /* MIME ENCODE */
205
206
207 /* byte order */
208
209 #define         ENDIAN_BIG      1234
210 #define         ENDIAN_LITTLE   4321
211 #define         ENDIAN_2143     2143
212 #define         ENDIAN_3412     3412
213
214 /* ASCII CODE */
215
216 #define         BS      0x08
217 #define         TAB     0x09
218 #define         LF      0x0a
219 #define         CR      0x0d
220 #define         ESC     0x1b
221 #define         SP      0x20
222 #define         AT      0x40
223 #define         SSP     0xa0
224 #define         DEL     0x7f
225 #define         SI      0x0f
226 #define         SO      0x0e
227 #define         SSO     0x8e
228 #define         SS3     0x8f
229 #define         CRLF    0x0D0A
230
231
232 /* encodings */
233
234 enum nkf_encodings {
235     ASCII,
236     ISO_8859_1,
237     ISO_2022_JP,
238     CP50220,
239     CP50221,
240     CP50222,
241     ISO_2022_JP_1,
242     ISO_2022_JP_3,
243     SHIFT_JIS,
244     WINDOWS_31J,
245     CP10001,
246     EUC_JP,
247     CP51932,
248     EUCJP_MS,
249     EUCJP_ASCII,
250     SHIFT_JISX0213,
251     SHIFT_JIS_2004,
252     EUC_JISX0213,
253     EUC_JIS_2004,
254     UTF_8,
255     UTF_8N,
256     UTF_8_BOM,
257     UTF8_MAC,
258     UTF_16,
259     UTF_16BE,
260     UTF_16BE_BOM,
261     UTF_16LE,
262     UTF_16LE_BOM,
263     UTF_32,
264     UTF_32BE,
265     UTF_32BE_BOM,
266     UTF_32LE,
267     UTF_32LE_BOM,
268     JIS_X_0201=0x1000,
269     JIS_X_0208,
270     JIS_X_0212,
271     JIS_X_0213_1,
272     JIS_X_0213_2,
273     BINARY
274 };
275
276 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
277 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
278 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
279 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
280 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
281 void j_oconv(nkf_char c2, nkf_char c1);
282 void s_oconv(nkf_char c2, nkf_char c1);
283 void e_oconv(nkf_char c2, nkf_char c1);
284 void w_oconv(nkf_char c2, nkf_char c1);
285 void w_oconv16(nkf_char c2, nkf_char c1);
286 void w_oconv32(nkf_char c2, nkf_char c1);
287
288 typedef struct {
289     char *name;
290     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
291     void (*oconv_func)(nkf_char c2, nkf_char c1);
292 } nkf_native_encoding;
293
294 nkf_native_encoding NkfEncodingASCII =          { "US_ASCII", e_iconv, e_oconv };
295 nkf_native_encoding NkfEncodingISO_2022_JP =    { "ISO-2022-JP", e_iconv, j_oconv };
296 nkf_native_encoding NkfEncodingShift_JIS =      { "Shift_JIS", s_iconv, s_oconv };
297 nkf_native_encoding NkfEncodingEUC_JP =         { "EUC-JP", e_iconv, e_oconv };
298 nkf_native_encoding NkfEncodingUTF_8 =          { "UTF-8", w_iconv, w_oconv };
299 nkf_native_encoding NkfEncodingUTF_16 =         { "UTF-16", w_iconv16, w_oconv16 };
300 nkf_native_encoding NkfEncodingUTF_32 =         { "UTF-32", w_iconv32, w_oconv32 };
301
302 typedef struct {
303     int id;
304     char *name;
305     nkf_native_encoding *based_encoding;
306 } nkf_encoding;
307 nkf_encoding nkf_encoding_table[] = {
308     {ASCII,             "ASCII",                &NkfEncodingASCII},
309     {ISO_8859_1,        "ISO-8859-1",           &NkfEncodingASCII},
310     {ISO_2022_JP,       "ISO-2022-JP",          &NkfEncodingASCII},
311     {CP50220,           "CP50220",              &NkfEncodingISO_2022_JP},
312     {CP50221,           "CP50221",              &NkfEncodingISO_2022_JP},
313     {CP50222,           "CP50222",              &NkfEncodingISO_2022_JP},
314     {ISO_2022_JP_1,     "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
315     {ISO_2022_JP_3,     "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
316     {SHIFT_JIS,         "Shift_JIS",            &NkfEncodingShift_JIS},
317     {WINDOWS_31J,       "WINDOWS-31J",          &NkfEncodingShift_JIS},
318     {CP10001,           "CP10001",              &NkfEncodingShift_JIS},
319     {EUC_JP,            "EUC-JP",               &NkfEncodingEUC_JP},
320     {CP51932,           "CP51932",              &NkfEncodingEUC_JP},
321     {EUCJP_MS,          "eucJP-MS",             &NkfEncodingEUC_JP},
322     {EUCJP_ASCII,       "eucJP-ASCII",          &NkfEncodingEUC_JP},
323     {SHIFT_JISX0213,    "Shift_JISX0213",       &NkfEncodingShift_JIS},
324     {SHIFT_JIS_2004,    "Shift_JIS-2004",       &NkfEncodingShift_JIS},
325     {EUC_JISX0213,      "EUC-JISX0213",         &NkfEncodingEUC_JP},
326     {EUC_JIS_2004,      "EUC-JIS-2004",         &NkfEncodingEUC_JP},
327     {UTF_8,             "UTF-8",                &NkfEncodingUTF_8},
328     {UTF_8N,            "UTF-8N",               &NkfEncodingUTF_8},
329     {UTF_8_BOM,         "UTF-8-BOM",            &NkfEncodingUTF_8},
330     {UTF8_MAC,          "UTF8-MAC",             &NkfEncodingUTF_8},
331     {UTF_16,            "UTF-16",               &NkfEncodingUTF_16},
332     {UTF_16BE,          "UTF-16BE",             &NkfEncodingUTF_16},
333     {UTF_16BE_BOM,      "UTF-16BE-BOM",         &NkfEncodingUTF_16},
334     {UTF_16LE,          "UTF-16LE",             &NkfEncodingUTF_16},
335     {UTF_16LE_BOM,      "UTF-16LE-BOM",         &NkfEncodingUTF_16},
336     {UTF_32,            "UTF-32",               &NkfEncodingUTF_32},
337     {UTF_32BE,          "UTF-32BE",             &NkfEncodingUTF_32},
338     {UTF_32BE_BOM,      "UTF-32BE-BOM",         &NkfEncodingUTF_32},
339     {UTF_32LE,          "UTF-32LE",             &NkfEncodingUTF_32},
340     {UTF_32LE_BOM,      "UTF-32LE-BOM",         &NkfEncodingUTF_32},
341     {BINARY,            "BINARY",               &NkfEncodingASCII},
342     {-1,                NULL,                   NULL}
343 };
344 #define NKF_ENCODING_TABLE_SIZE 34
345 struct {
346     const char *name;
347     const int id;
348 } encoding_name_to_id_table[] = {
349     {"ASCII",                   ASCII},
350     {"ISO-2022-JP",             ISO_2022_JP},
351     {"X-ISO2022JP-CP932",       CP50220},
352     {"CP50220",                 CP50220},
353     {"CP50221",                 CP50221},
354     {"CP50222",                 CP50222},
355     {"ISO-2022-JP-1",           ISO_2022_JP_1},
356     {"ISO-2022-JP-3",           ISO_2022_JP_3},
357     {"SHIFT_JIS",               SHIFT_JIS},
358     {"SJIS",                    SHIFT_JIS},
359     {"WINDOWS-31J",             WINDOWS_31J},
360     {"CSWINDOWS31J",            WINDOWS_31J},
361     {"CP932",                   WINDOWS_31J},
362     {"MS932",                   WINDOWS_31J},
363     {"CP10001",                 CP10001},
364     {"EUCJP",                   EUC_JP},
365     {"EUC-JP",                  EUC_JP},
366     {"CP51932",                 CP51932},
367     {"EUC-JP-MS",               EUCJP_MS},
368     {"EUCJP-MS",                EUCJP_MS},
369     {"EUCJPMS",                 EUCJP_MS},
370     {"EUC-JP-ASCII",            EUCJP_ASCII},
371     {"EUCJP-ASCII",             EUCJP_ASCII},
372     {"SHIFT_JISX0213",          SHIFT_JISX0213},
373     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
374     {"EUC-JISX0213",            EUC_JISX0213},
375     {"EUC-JIS-2004",            EUC_JIS_2004},
376     {"UTF-8",                   UTF_8},
377     {"UTF-8N",                  UTF_8N},
378     {"UTF-8-BOM",               UTF_8_BOM},
379     {"UTF8-MAC",                UTF8_MAC},
380     {"UTF-8-MAC",               UTF8_MAC},
381     {"UTF-16",                  UTF_16},
382     {"UTF-16BE",                UTF_16BE},
383     {"UTF-16BE-BOM",            UTF_16BE_BOM},
384     {"UTF-16LE",                UTF_16LE},
385     {"UTF-16LE-BOM",            UTF_16LE_BOM},
386     {"UTF-32",                  UTF_32},
387     {"UTF-32BE",                UTF_32BE},
388     {"UTF-32BE-BOM",            UTF_32BE_BOM},
389     {"UTF-32LE",                UTF_32LE},
390     {"UTF-32LE-BOM",            UTF_32LE_BOM},
391     {"BINARY",                  BINARY},
392     {NULL,                      -1}
393 };
394 #if defined(DEFAULT_CODE_JIS)
395 #define     DEFAULT_ENCODING ISO_2022_JP
396 #elif defined(DEFAULT_CODE_SJIS)
397 #define     DEFAULT_ENCODING SHIFT_JIS
398 #elif defined(DEFAULT_CODE_EUC)
399 #define     DEFAULT_ENCODING EUC_JP
400 #elif defined(DEFAULT_CODE_UTF8)
401 #define     DEFAULT_ENCODING UTF_8
402 #endif
403
404
405 #define         is_alnum(c)  \
406             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
407
408 /* I don't trust portablity of toupper */
409 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
410 #define nkf_isoctal(c)  ('0'<=c && c<='7')
411 #define nkf_isdigit(c)  ('0'<=c && c<='9')
412 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
413 #define nkf_isblank(c) (c == SP || c == TAB)
414 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
415 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
416 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
417 #define nkf_isprint(c) (SP<=c && c<='~')
418 #define nkf_isgraph(c) ('!'<=c && c<='~')
419 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
420                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
421                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
422 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
423 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
424 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
425     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
426      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
427
428 #define CP932_TABLE_BEGIN 0xFA
429 #define CP932_TABLE_END   0xFC
430 #define CP932INV_TABLE_BEGIN 0xED
431 #define CP932INV_TABLE_END   0xEE
432 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
433
434 #define         HOLD_SIZE       1024
435 #if defined(INT_IS_SHORT)
436 #define         IOBUF_SIZE      2048
437 #else
438 #define         IOBUF_SIZE      16384
439 #endif
440
441 #define         DEFAULT_J       'B'
442 #define         DEFAULT_R       'B'
443
444 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
445 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
446
447 #define         RANGE_NUM_MAX   18
448 #define         GETA1   0x22
449 #define         GETA2   0x2e
450
451
452 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
453 #define sizeof_euc_to_utf8_1byte 94
454 #define sizeof_euc_to_utf8_2bytes 94
455 #define sizeof_utf8_to_euc_C2 64
456 #define sizeof_utf8_to_euc_E5B8 64
457 #define sizeof_utf8_to_euc_2bytes 112
458 #define sizeof_utf8_to_euc_3bytes 16
459 #endif
460
461 /* MIME preprocessor */
462
463 #ifdef EASYWIN /*Easy Win */
464 extern POINT _BufferSize;
465 #endif
466
467 struct input_code{
468     char *name;
469     nkf_char stat;
470     nkf_char score;
471     nkf_char index;
472     nkf_char buf[3];
473     void (*status_func)(struct input_code *, nkf_char);
474     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
475     int _file_stat;
476 };
477
478 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
479 static nkf_encoding *output_encoding;
480
481 #if !defined(PERL_XS) && !defined(WIN32DLL)
482 static  nkf_char     noconvert(FILE *f);
483 #endif
484 static  void    module_connection(void);
485 static  nkf_char     kanji_convert(FILE *f);
486 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
487 static  nkf_char     push_hold_buf(nkf_char c2);
488 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
489 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
490 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
491 /* UCS Mapping
492  * 0: Shift_JIS, eucJP-ascii
493  * 1: eucJP-ms
494  * 2: CP932, CP51932
495  * 3: CP10001
496  */
497 #define UCS_MAP_ASCII   0
498 #define UCS_MAP_MS      1
499 #define UCS_MAP_CP932   2
500 #define UCS_MAP_CP10001 3
501 static int ms_ucs_map_f = UCS_MAP_ASCII;
502 #endif
503 #ifdef UTF8_INPUT_ENABLE
504 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
505 static  int     no_cp932ext_f = FALSE;
506 /* ignore ZERO WIDTH NO-BREAK SPACE */
507 static  int     no_best_fit_chars_f = FALSE;
508 static  int     input_endian = ENDIAN_BIG;
509 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
510 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
511 static  void    encode_fallback_html(nkf_char c);
512 static  void    encode_fallback_xml(nkf_char c);
513 static  void    encode_fallback_java(nkf_char c);
514 static  void    encode_fallback_perl(nkf_char c);
515 static  void    encode_fallback_subchar(nkf_char c);
516 static  void    (*encode_fallback)(nkf_char c) = NULL;
517 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
518 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
519 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
520 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
521 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
522 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
523 static  void    w_status(struct input_code *, nkf_char);
524 #endif
525 #ifdef UTF8_OUTPUT_ENABLE
526 static  int     output_bom_f = FALSE;
527 static  int     output_endian = ENDIAN_BIG;
528 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
529 #endif
530 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
531 static  void    fold_conv(nkf_char c2,nkf_char c1);
532 static  void    nl_conv(nkf_char c2,nkf_char c1);
533 static  void    z_conv(nkf_char c2,nkf_char c1);
534 static  void    rot_conv(nkf_char c2,nkf_char c1);
535 static  void    hira_conv(nkf_char c2,nkf_char c1);
536 static  void    base64_conv(nkf_char c2,nkf_char c1);
537 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
538 static  void    no_connection(nkf_char c2,nkf_char c1);
539 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
540
541 static  void    code_score(struct input_code *ptr);
542 static  void    code_status(nkf_char c);
543
544 static  void    std_putc(nkf_char c);
545 static  nkf_char     std_getc(FILE *f);
546 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
547
548 static  nkf_char     broken_getc(FILE *f);
549 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
550
551 static  nkf_char     mime_begin(FILE *f);
552 static  nkf_char     mime_getc(FILE *f);
553 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
554
555 static  void    switch_mime_getc(void);
556 static  void    unswitch_mime_getc(void);
557 static  nkf_char     mime_begin_strict(FILE *f);
558 static  nkf_char     mime_getc_buf(FILE *f);
559 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
560 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
561
562 static  nkf_char     base64decode(nkf_char c);
563 static  void    mime_prechar(nkf_char c2, nkf_char c1);
564 static  void    mime_putc(nkf_char c);
565 static  void    open_mime(nkf_char c);
566 static  void    close_mime(void);
567 static  void    eof_mime(void);
568 static  void    mimeout_addchar(nkf_char c);
569 #ifndef PERL_XS
570 static  void    usage(void);
571 static  void    version(void);
572 static  void    show_configuration(void);
573 #endif
574 static  void    options(unsigned char *c);
575 static  void    reinit(void);
576
577 /* buffers */
578
579 #if !defined(PERL_XS) && !defined(WIN32DLL)
580 static unsigned char   stdibuf[IOBUF_SIZE];
581 static unsigned char   stdobuf[IOBUF_SIZE];
582 #endif
583 static unsigned char   hold_buf[HOLD_SIZE*2];
584 static int             hold_count = 0;
585
586 /* MIME preprocessor fifo */
587
588 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
589 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
590 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
591 static unsigned char           mime_buf[MIME_BUF_SIZE];
592 static unsigned int            mime_top = 0;
593 static unsigned int            mime_last = 0;  /* decoded */
594 static unsigned int            mime_input = 0; /* undecoded */
595 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
596
597 /* flags */
598 static int             unbuf_f = FALSE;
599 static int             estab_f = FALSE;
600 static int             nop_f = FALSE;
601 static int             binmode_f = TRUE;       /* binary mode */
602 static int             rot_f = FALSE;          /* rot14/43 mode */
603 static int             hira_f = FALSE;          /* hira/kata henkan */
604 static int             input_f = FALSE;        /* non fixed input code  */
605 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
606 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
607 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
608 static int             mimebuf_f = FALSE;      /* MIME buffered input */
609 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
610 static int             iso8859_f = FALSE;      /* ISO8859 through */
611 static int             mimeout_f = FALSE;       /* base64 mode */
612 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
613 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
614
615 #ifdef UNICODE_NORMALIZATION
616 static int nfc_f = FALSE;
617 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
618 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
619 static nkf_char nfc_getc(FILE *f);
620 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
621 #endif
622
623 #ifdef INPUT_OPTION
624 static int cap_f = FALSE;
625 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
626 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
627 static nkf_char cap_getc(FILE *f);
628 static nkf_char cap_ungetc(nkf_char c,FILE *f);
629
630 static int url_f = FALSE;
631 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
632 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
633 static nkf_char url_getc(FILE *f);
634 static nkf_char url_ungetc(nkf_char c,FILE *f);
635 #endif
636
637 #if defined(INT_IS_SHORT)
638 #define NKF_INT32_C(n)   (n##L)
639 #else
640 #define NKF_INT32_C(n)   (n)
641 #endif
642 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
643 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
644 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
645 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
646 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
647 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
648 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
649
650 #ifdef NUMCHAR_OPTION
651 static int numchar_f = FALSE;
652 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
653 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
654 static nkf_char numchar_getc(FILE *f);
655 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
656 #endif
657
658 #ifdef CHECK_OPTION
659 static int noout_f = FALSE;
660 static void no_putc(nkf_char c);
661 static int debug_f = FALSE;
662 static void debug(const char *str);
663 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
664 #endif
665
666 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
667 #if !defined PERL_XS
668 static  void    print_guessed_code(char *filename);
669 #endif
670 static  void    set_input_codename(char *codename);
671
672 #ifdef EXEC_IO
673 static int exec_f = 0;
674 #endif
675
676 #ifdef SHIFTJIS_CP932
677 /* invert IBM extended characters to others */
678 static int cp51932_f = FALSE;
679
680 /* invert NEC-selected IBM extended characters to IBM extended characters */
681 static int cp932inv_f = TRUE;
682
683 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
684 #endif /* SHIFTJIS_CP932 */
685
686 #ifdef X0212_ENABLE
687 static int x0212_f = FALSE;
688 static nkf_char x0212_shift(nkf_char c);
689 static nkf_char x0212_unshift(nkf_char c);
690 #endif
691 static int x0213_f = FALSE;
692
693 static unsigned char prefix_table[256];
694
695 static void set_code_score(struct input_code *ptr, nkf_char score);
696 static void clr_code_score(struct input_code *ptr, nkf_char score);
697 static void status_disable(struct input_code *ptr);
698 static void status_push_ch(struct input_code *ptr, nkf_char c);
699 static void status_clear(struct input_code *ptr);
700 static void status_reset(struct input_code *ptr);
701 static void status_reinit(struct input_code *ptr);
702 static void status_check(struct input_code *ptr, nkf_char c);
703 static void e_status(struct input_code *, nkf_char);
704 static void s_status(struct input_code *, nkf_char);
705
706 struct input_code input_code_list[] = {
707     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
708     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
709 #ifdef UTF8_INPUT_ENABLE
710     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
711     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
712     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
713 #endif
714     {0}
715 };
716
717 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
718 static int              base64_count = 0;
719
720 /* X0208 -> ASCII converter */
721
722 /* fold parameter */
723 static int             f_line = 0;    /* chars in line */
724 static int             f_prev = 0;
725 static int             fold_preserve_f = FALSE; /* preserve new lines */
726 static int             fold_f  = FALSE;
727 static int             fold_len  = 0;
728
729 /* options */
730 static unsigned char   kanji_intro = DEFAULT_J;
731 static unsigned char   ascii_intro = DEFAULT_R;
732
733 /* Folding */
734
735 #define FOLD_MARGIN  10
736 #define DEFAULT_FOLD 60
737
738 static int             fold_margin  = FOLD_MARGIN;
739
740 /* converters */
741
742 #ifdef DEFAULT_CODE_JIS
743 #   define  DEFAULT_CONV j_oconv
744 #endif
745 #ifdef DEFAULT_CODE_SJIS
746 #   define  DEFAULT_CONV s_oconv
747 #endif
748 #ifdef DEFAULT_CODE_EUC
749 #   define  DEFAULT_CONV e_oconv
750 #endif
751 #ifdef DEFAULT_CODE_UTF8
752 #   define  DEFAULT_CONV w_oconv
753 #endif
754
755 /* process default */
756 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
757
758 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
759 /* s_iconv or oconv */
760 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
761
762 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
763 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
764 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
765 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
766 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
767 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
768 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
769
770 /* static redirections */
771
772 static  void   (*o_putc)(nkf_char c) = std_putc;
773
774 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
775 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
776
777 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
778 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
779
780 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
781
782 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
783 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
784
785 /* for strict mime */
786 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
787 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
788
789 /* Global states */
790 static int output_mode = ASCII,    /* output kanji mode */
791            input_mode =  ASCII,    /* input kanji mode */
792            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
793 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
794
795 /* X0201 / X0208 conversion tables */
796
797 /* X0201 kana conversion table */
798 /* 90-9F A0-DF */
799 static const unsigned char cv[]= {
800     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
801     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
802     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
803     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
804     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
805     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
806     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
807     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
808     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
809     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
810     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
811     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
812     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
813     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
814     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
815     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
816     0x00,0x00};
817
818
819 /* X0201 kana conversion table for daguten */
820 /* 90-9F A0-DF */
821 static const unsigned char dv[]= {
822     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
827     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
828     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
829     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
830     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
831     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
832     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
833     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
834     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
838     0x00,0x00};
839
840 /* X0201 kana conversion table for han-daguten */
841 /* 90-9F A0-DF */
842 static const unsigned char ev[]= {
843     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
854     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
855     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
858     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859     0x00,0x00};
860
861
862 /* X0208 kigou conversion table */
863 /* 0x8140 - 0x819e */
864 static const unsigned char fv[] = {
865
866     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
867     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
868     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
869     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
870     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
871     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
872     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
873     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
874     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
875     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
877     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
878 } ;
879
880
881
882 static int             file_out_f = FALSE;
883 #ifdef OVERWRITE
884 static int             overwrite_f = FALSE;
885 static int             preserve_time_f = FALSE;
886 static int             backup_f = FALSE;
887 static char            *backup_suffix = "";
888 static char *get_backup_filename(const char *suffix, const char *filename);
889 #endif
890
891 static int nlmode_f = 0;   /* CR, LF, CRLF */
892 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
893 static nkf_char prev_cr = 0; /* CR or 0 */
894 #ifdef EASYWIN /*Easy Win */
895 static int             end_check;
896 #endif /*Easy Win */
897
898 #define STD_GC_BUFSIZE (256)
899 nkf_char std_gc_buf[STD_GC_BUFSIZE];
900 nkf_char std_gc_ndx;
901
902 char* nkf_strcpy(const char *str)
903 {
904     char* result = malloc(strlen(str) + 1);
905     if (!result){
906         perror(str);
907         return "";
908     }
909     strcpy(result, str);
910     return result;
911 }
912
913 static void nkf_str_upcase(const char *src, char *dest, size_t length)
914 {
915     int i = 0;
916     for (; i < length && src[i]; i++) {
917         dest[i] = nkf_toupper(src[i]);
918     }
919     dest[i] = 0;
920 }
921
922 static nkf_encoding *nkf_enc_from_index(int idx)
923 {
924     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
925         return 0;
926     }
927     return &nkf_encoding_table[idx];
928 }
929
930 static int nkf_enc_find_index(const char *name)
931 {
932     int i, index = -1;
933     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
934         if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
935             return encoding_name_to_id_table[i].id;
936         }
937     }
938     return index;
939 }
940
941 static nkf_encoding *nkf_enc_find(const char *name)
942 {
943     int idx = -1;
944     idx = nkf_enc_find_index(name);
945     if (idx < 0) return 0;
946     return nkf_enc_from_index(idx);
947 }
948
949 #define nkf_enc_name(enc) (enc)->name
950 #define nkf_enc_to_index(enc) (enc)->id
951 #define nkf_enc_to_base_encoding(enc) (enc)->based_encoding
952
953 #ifdef WIN32DLL
954 #include "nkf32dll.c"
955 #elif defined(PERL_XS)
956 #else /* WIN32DLL */
957 int main(int argc, char **argv)
958 {
959     FILE  *fin;
960     unsigned char  *cp;
961
962     char *outfname = NULL;
963     char *origfname;
964
965 #ifdef EASYWIN /*Easy Win */
966     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
967 #endif
968
969     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
970         cp = (unsigned char *)*argv;
971         options(cp);
972         if (guess_f) {
973 #ifdef CHECK_OPTION
974             int debug_f_back = debug_f;
975 #endif
976 #ifdef EXEC_IO
977             int exec_f_back = exec_f;
978 #endif
979 #ifdef X0212_ENABLE
980             int x0212_f_back = x0212_f;
981 #endif
982             int x0213_f_back = x0213_f;
983             int guess_f_back = guess_f;
984             reinit();
985             guess_f = guess_f_back;
986             mime_f = FALSE;
987 #ifdef CHECK_OPTION
988             debug_f = debug_f_back;
989 #endif
990 #ifdef EXEC_IO
991             exec_f = exec_f_back;
992 #endif
993 #ifdef X0212_ENABLE
994             x0212_f = x0212_f_back;
995 #endif
996             x0213_f = x0213_f_back;
997         }
998 #ifdef EXEC_IO
999         if (exec_f){
1000             int fds[2], pid;
1001             if (pipe(fds) < 0 || (pid = fork()) < 0){
1002                 abort();
1003             }
1004             if (pid == 0){
1005                 if (exec_f > 0){
1006                     close(fds[0]);
1007                     dup2(fds[1], 1);
1008                 }else{
1009                     close(fds[1]);
1010                     dup2(fds[0], 0);
1011                 }
1012                 execvp(argv[1], &argv[1]);
1013             }
1014             if (exec_f > 0){
1015                 close(fds[1]);
1016                 dup2(fds[0], 0);
1017             }else{
1018                 close(fds[0]);
1019                 dup2(fds[1], 1);
1020             }
1021             argc = 0;
1022             break;
1023         }
1024 #endif
1025     }
1026
1027     if (binmode_f == TRUE)
1028 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1029     if (freopen("","wb",stdout) == NULL)
1030         return (-1);
1031 #else
1032     setbinmode(stdout);
1033 #endif
1034
1035     if (unbuf_f)
1036       setbuf(stdout, (char *) NULL);
1037     else
1038       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
1039
1040     if (argc == 0) {
1041       if (binmode_f == TRUE)
1042 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1043       if (freopen("","rb",stdin) == NULL) return (-1);
1044 #else
1045       setbinmode(stdin);
1046 #endif
1047       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
1048       if (nop_f)
1049           noconvert(stdin);
1050       else {
1051           kanji_convert(stdin);
1052           if (guess_f) print_guessed_code(NULL);
1053       }
1054     } else {
1055       int nfiles = argc;
1056         int is_argument_error = FALSE;
1057       while (argc--) {
1058             input_codename = NULL;
1059             input_newline = 0;
1060 #ifdef CHECK_OPTION
1061             iconv_for_check = 0;
1062 #endif
1063           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
1064                 perror(*(argv-1));
1065                 is_argument_error = TRUE;
1066                 continue;
1067           } else {
1068 #ifdef OVERWRITE
1069               int fd = 0;
1070               int fd_backup = 0;
1071 #endif
1072
1073 /* reopen file for stdout */
1074               if (file_out_f == TRUE) {
1075 #ifdef OVERWRITE
1076                   if (overwrite_f){
1077                       outfname = malloc(strlen(origfname)
1078                                         + strlen(".nkftmpXXXXXX")
1079                                         + 1);
1080                       if (!outfname){
1081                           perror(origfname);
1082                           return -1;
1083                       }
1084                       strcpy(outfname, origfname);
1085 #ifdef MSDOS
1086                       {
1087                           int i;
1088                           for (i = strlen(outfname); i; --i){
1089                               if (outfname[i - 1] == '/'
1090                                   || outfname[i - 1] == '\\'){
1091                                   break;
1092                               }
1093                           }
1094                           outfname[i] = '\0';
1095                       }
1096                       strcat(outfname, "ntXXXXXX");
1097                       mktemp(outfname);
1098                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
1099                                 S_IREAD | S_IWRITE);
1100 #else
1101                       strcat(outfname, ".nkftmpXXXXXX");
1102                       fd = mkstemp(outfname);
1103 #endif
1104                       if (fd < 0
1105                           || (fd_backup = dup(fileno(stdout))) < 0
1106                           || dup2(fd, fileno(stdout)) < 0
1107                           ){
1108                           perror(origfname);
1109                           return -1;
1110                       }
1111                   }else
1112 #endif
1113                   if(argc == 1) {
1114                       outfname = *argv++;
1115                       argc--;
1116                   } else {
1117                       outfname = "nkf.out";
1118                   }
1119
1120                   if(freopen(outfname, "w", stdout) == NULL) {
1121                       perror (outfname);
1122                       return (-1);
1123                   }
1124                   if (binmode_f == TRUE) {
1125 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1126                       if (freopen("","wb",stdout) == NULL)
1127                            return (-1);
1128 #else
1129                       setbinmode(stdout);
1130 #endif
1131                   }
1132               }
1133               if (binmode_f == TRUE)
1134 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1135                  if (freopen("","rb",fin) == NULL)
1136                     return (-1);
1137 #else
1138                  setbinmode(fin);
1139 #endif
1140               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
1141               if (nop_f)
1142                   noconvert(fin);
1143               else {
1144                   char *filename = NULL;
1145                   kanji_convert(fin);
1146                   if (nfiles > 1) filename = origfname;
1147                   if (guess_f) print_guessed_code(filename);
1148               }
1149               fclose(fin);
1150 #ifdef OVERWRITE
1151               if (overwrite_f) {
1152                   struct stat     sb;
1153 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1154                   time_t tb[2];
1155 #else
1156                   struct utimbuf  tb;
1157 #endif
1158
1159                   fflush(stdout);
1160                   close(fd);
1161                   if (dup2(fd_backup, fileno(stdout)) < 0){
1162                       perror("dup2");
1163                   }
1164                   if (stat(origfname, &sb)) {
1165                       fprintf(stderr, "Can't stat %s\n", origfname);
1166                   }
1167                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
1168                   if (chmod(outfname, sb.st_mode)) {
1169                       fprintf(stderr, "Can't set permission %s\n", outfname);
1170                   }
1171
1172                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
1173                     if(preserve_time_f){
1174 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1175                         tb[0] = tb[1] = sb.st_mtime;
1176                         if (utime(outfname, tb)) {
1177                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1178                         }
1179 #else
1180                         tb.actime  = sb.st_atime;
1181                         tb.modtime = sb.st_mtime;
1182                         if (utime(outfname, &tb)) {
1183                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1184                         }
1185 #endif
1186                     }
1187                     if(backup_f){
1188                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
1189 #ifdef MSDOS
1190                         unlink(backup_filename);
1191 #endif
1192                         if (rename(origfname, backup_filename)) {
1193                             perror(backup_filename);
1194                             fprintf(stderr, "Can't rename %s to %s\n",
1195                                     origfname, backup_filename);
1196                         }
1197                     }else{
1198 #ifdef MSDOS
1199                         if (unlink(origfname)){
1200                             perror(origfname);
1201                         }
1202 #endif
1203                     }
1204                   if (rename(outfname, origfname)) {
1205                       perror(origfname);
1206                       fprintf(stderr, "Can't rename %s to %s\n",
1207                               outfname, origfname);
1208                   }
1209                   free(outfname);
1210               }
1211 #endif
1212           }
1213       }
1214         if (is_argument_error)
1215             return(-1);
1216     }
1217 #ifdef EASYWIN /*Easy Win */
1218     if (file_out_f == FALSE)
1219         scanf("%d",&end_check);
1220     else
1221         fclose(stdout);
1222 #else /* for Other OS */
1223     if (file_out_f == TRUE)
1224         fclose(stdout);
1225 #endif /*Easy Win */
1226     return (0);
1227 }
1228 #endif /* WIN32DLL */
1229
1230 #ifdef OVERWRITE
1231 char *get_backup_filename(const char *suffix, const char *filename)
1232 {
1233     char *backup_filename;
1234     int asterisk_count = 0;
1235     int i, j;
1236     int filename_length = strlen(filename);
1237
1238     for(i = 0; suffix[i]; i++){
1239         if(suffix[i] == '*') asterisk_count++;
1240     }
1241
1242     if(asterisk_count){
1243         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1244         if (!backup_filename){
1245             perror("Can't malloc backup filename.");
1246             return NULL;
1247         }
1248
1249         for(i = 0, j = 0; suffix[i];){
1250             if(suffix[i] == '*'){
1251                 backup_filename[j] = '\0';
1252                 strncat(backup_filename, filename, filename_length);
1253                 i++;
1254                 j += filename_length;
1255             }else{
1256                 backup_filename[j++] = suffix[i++];
1257             }
1258         }
1259         backup_filename[j] = '\0';
1260     }else{
1261         j = strlen(suffix) + filename_length;
1262         backup_filename = malloc( + 1);
1263         strcpy(backup_filename, filename);
1264         strcat(backup_filename, suffix);
1265         backup_filename[j] = '\0';
1266     }
1267     return backup_filename;
1268 }
1269 #endif
1270
1271 static const struct {
1272     const char *name;
1273     const char *alias;
1274 } long_option[] = {
1275     {"ic=", ""},
1276     {"oc=", ""},
1277     {"base64","jMB"},
1278     {"euc","e"},
1279     {"euc-input","E"},
1280     {"fj","jm"},
1281     {"help","v"},
1282     {"jis","j"},
1283     {"jis-input","J"},
1284     {"mac","sLm"},
1285     {"mime","jM"},
1286     {"mime-input","m"},
1287     {"msdos","sLw"},
1288     {"sjis","s"},
1289     {"sjis-input","S"},
1290     {"unix","eLu"},
1291     {"version","V"},
1292     {"windows","sLw"},
1293     {"hiragana","h1"},
1294     {"katakana","h2"},
1295     {"katakana-hiragana","h3"},
1296     {"guess=", ""},
1297     {"guess", "g1"},
1298     {"cp932", ""},
1299     {"no-cp932", ""},
1300 #ifdef X0212_ENABLE
1301     {"x0212", ""},
1302 #endif
1303 #ifdef UTF8_OUTPUT_ENABLE
1304     {"utf8", "w"},
1305     {"utf16", "w16"},
1306     {"ms-ucs-map", ""},
1307     {"fb-skip", ""},
1308     {"fb-html", ""},
1309     {"fb-xml", ""},
1310     {"fb-perl", ""},
1311     {"fb-java", ""},
1312     {"fb-subchar", ""},
1313     {"fb-subchar=", ""},
1314 #endif
1315 #ifdef UTF8_INPUT_ENABLE
1316     {"utf8-input", "W"},
1317     {"utf16-input", "W16"},
1318     {"no-cp932ext", ""},
1319     {"no-best-fit-chars",""},
1320 #endif
1321 #ifdef UNICODE_NORMALIZATION
1322     {"utf8mac-input", ""},
1323 #endif
1324 #ifdef OVERWRITE
1325     {"overwrite", ""},
1326     {"overwrite=", ""},
1327     {"in-place", ""},
1328     {"in-place=", ""},
1329 #endif
1330 #ifdef INPUT_OPTION
1331     {"cap-input", ""},
1332     {"url-input", ""},
1333 #endif
1334 #ifdef NUMCHAR_OPTION
1335     {"numchar-input", ""},
1336 #endif
1337 #ifdef CHECK_OPTION
1338     {"no-output", ""},
1339     {"debug", ""},
1340 #endif
1341 #ifdef SHIFTJIS_CP932
1342     {"cp932inv", ""},
1343 #endif
1344 #ifdef EXEC_IO
1345     {"exec-in", ""},
1346     {"exec-out", ""},
1347 #endif
1348     {"prefix=", ""},
1349 };
1350
1351 static int option_mode = 0;
1352
1353 void options(unsigned char *cp)
1354 {
1355     nkf_char i, j;
1356     unsigned char *p;
1357     unsigned char *cp_back = NULL;
1358     char codeset[32];
1359     nkf_encoding *enc;
1360
1361     if (!output_encoding) output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
1362     if (option_mode==1)
1363         return;
1364     while(*cp && *cp++!='-');
1365     while (*cp || cp_back) {
1366         if(!*cp){
1367             cp = cp_back;
1368             cp_back = NULL;
1369             continue;
1370         }
1371         p = 0;
1372         switch (*cp++) {
1373         case '-':  /* literal options */
1374             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1375                 option_mode = 1;
1376                 return;
1377             }
1378             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1379                 p = (unsigned char *)long_option[i].name;
1380                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1381                 if (*p == cp[j] || cp[j] == SP){
1382                     p = &cp[j] + 1;
1383                     break;
1384                 }
1385                 p = 0;
1386             }
1387             if (p == 0) {
1388                 fprintf(stderr, "unknown long option: --%s\n", cp);
1389                 return;
1390             }
1391             while(*cp && *cp != SP && cp++);
1392             if (long_option[i].alias[0]){
1393                 cp_back = cp;
1394                 cp = (unsigned char *)long_option[i].alias;
1395             }else{
1396                 if (strcmp(long_option[i].name, "ic=") == 0){
1397                     nkf_str_upcase((char *)p, codeset, 32);
1398                     enc = nkf_enc_find(codeset);
1399                     switch (nkf_enc_to_index(enc)) {
1400                     case ISO_2022_JP:
1401                         input_f = JIS_INPUT;
1402                         break;
1403                     case CP50220:
1404                     case CP50221:
1405                     case CP50222:
1406                         input_f = JIS_INPUT;
1407 #ifdef SHIFTJIS_CP932
1408                         cp51932_f = TRUE;
1409 #endif
1410 #ifdef UTF8_OUTPUT_ENABLE
1411                         ms_ucs_map_f = UCS_MAP_CP932;
1412 #endif
1413                         break;
1414                     case ISO_2022_JP_1:
1415                         input_f = JIS_INPUT;
1416 #ifdef X0212_ENABLE
1417                         x0212_f = TRUE;
1418 #endif
1419                         break;
1420                     case ISO_2022_JP_3:
1421                         input_f = JIS_INPUT;
1422 #ifdef X0212_ENABLE
1423                         x0212_f = TRUE;
1424 #endif
1425                         x0213_f = TRUE;
1426                         break;
1427                     case SHIFT_JIS:
1428                         input_f = SJIS_INPUT;
1429                         break;
1430                     case WINDOWS_31J:
1431                         input_f = SJIS_INPUT;
1432 #ifdef SHIFTJIS_CP932
1433                         cp51932_f = TRUE;
1434 #endif
1435 #ifdef UTF8_OUTPUT_ENABLE
1436                         ms_ucs_map_f = UCS_MAP_CP932;
1437 #endif
1438                         break;
1439                     case CP10001:
1440                         input_f = SJIS_INPUT;
1441 #ifdef SHIFTJIS_CP932
1442                         cp51932_f = TRUE;
1443 #endif
1444 #ifdef UTF8_OUTPUT_ENABLE
1445                         ms_ucs_map_f = UCS_MAP_CP10001;
1446 #endif
1447                         break;
1448                     case EUC_JP:
1449                         input_f = EUC_INPUT;
1450                         break;
1451                     case CP51932:
1452                         input_f = EUC_INPUT;
1453 #ifdef SHIFTJIS_CP932
1454                         cp51932_f = TRUE;
1455 #endif
1456 #ifdef UTF8_OUTPUT_ENABLE
1457                         ms_ucs_map_f = UCS_MAP_CP932;
1458 #endif
1459                         break;
1460                     case EUCJP_MS:
1461                         input_f = EUC_INPUT;
1462 #ifdef SHIFTJIS_CP932
1463                         cp51932_f = FALSE;
1464 #endif
1465 #ifdef UTF8_OUTPUT_ENABLE
1466                         ms_ucs_map_f = UCS_MAP_MS;
1467 #endif
1468                         break;
1469                     case EUCJP_ASCII:
1470                         input_f = EUC_INPUT;
1471 #ifdef SHIFTJIS_CP932
1472                         cp51932_f = FALSE;
1473 #endif
1474 #ifdef UTF8_OUTPUT_ENABLE
1475                         ms_ucs_map_f = UCS_MAP_ASCII;
1476 #endif
1477                         break;
1478                     case SHIFT_JISX0213:
1479                     case SHIFT_JIS_2004:
1480                         input_f = SJIS_INPUT;
1481                         x0213_f = TRUE;
1482 #ifdef SHIFTJIS_CP932
1483                         cp51932_f = FALSE;
1484 #endif
1485                         break;
1486                     case EUC_JISX0213:
1487                     case EUC_JIS_2004:
1488                         input_f = EUC_INPUT;
1489                         x0213_f = TRUE;
1490 #ifdef SHIFTJIS_CP932
1491                         cp51932_f = FALSE;
1492 #endif
1493                         break;
1494 #ifdef UTF8_INPUT_ENABLE
1495                     case UTF_8:
1496                     case UTF_8N:
1497                     case UTF_8_BOM:
1498                         input_f = UTF8_INPUT;
1499                         break;
1500 #ifdef UNICODE_NORMALIZATION
1501                     case UTF8_MAC:
1502                         input_f = UTF8_INPUT;
1503                         nfc_f = TRUE;
1504                         break;
1505 #endif
1506                     case UTF_16:
1507                     case UTF_16BE:
1508                     case UTF_16BE_BOM:
1509                         input_f = UTF16_INPUT;
1510                         input_endian = ENDIAN_BIG;
1511                         break;
1512                     case UTF_16LE:
1513                     case UTF_16LE_BOM:
1514                         input_f = UTF16_INPUT;
1515                         input_endian = ENDIAN_LITTLE;
1516                         break;
1517                     case UTF_32:
1518                     case UTF_32BE:
1519                     case UTF_32BE_BOM:
1520                         input_f = UTF32_INPUT;
1521                         input_endian = ENDIAN_BIG;
1522                         break;
1523                     case UTF_32LE:
1524                     case UTF_32LE_BOM:
1525                         input_f = UTF32_INPUT;
1526                         input_endian = ENDIAN_LITTLE;
1527                         break;
1528 #endif
1529                     default:
1530                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1531                         break;
1532                     }
1533                     continue;
1534                 }
1535                 if (strcmp(long_option[i].name, "oc=") == 0){
1536                     x0201_f = FALSE;
1537                     nkf_str_upcase((char *)p, codeset, 32);
1538                     enc = nkf_enc_find(codeset);
1539                     if (enc <= 0) continue;
1540                     output_encoding = enc;
1541                     switch (nkf_enc_to_index(output_encoding)) {
1542                     case ISO_2022_JP:
1543                         output_conv = j_oconv;
1544                         break;
1545                     case CP50220:
1546                             output_conv = j_oconv;
1547                             x0201_f = TRUE;
1548 #ifdef SHIFTJIS_CP932
1549                             cp932inv_f = FALSE;
1550 #endif
1551 #ifdef UTF8_OUTPUT_ENABLE
1552                             ms_ucs_map_f = UCS_MAP_CP932;
1553 #endif
1554                         break;
1555                     case CP50221:
1556                         output_conv = j_oconv;
1557 #ifdef SHIFTJIS_CP932
1558                         cp932inv_f = FALSE;
1559 #endif
1560 #ifdef UTF8_OUTPUT_ENABLE
1561                         ms_ucs_map_f = UCS_MAP_CP932;
1562 #endif
1563                         break;
1564                     case ISO_2022_JP_1:
1565                         output_conv = j_oconv;
1566 #ifdef X0212_ENABLE
1567                         x0212_f = TRUE;
1568 #endif
1569 #ifdef SHIFTJIS_CP932
1570                         cp932inv_f = FALSE;
1571 #endif
1572                         break;
1573                     case ISO_2022_JP_3:
1574                         output_conv = j_oconv;
1575 #ifdef X0212_ENABLE
1576                         x0212_f = TRUE;
1577 #endif
1578                         x0213_f = TRUE;
1579 #ifdef SHIFTJIS_CP932
1580                         cp932inv_f = FALSE;
1581 #endif
1582                         break;
1583                     case SHIFT_JIS:
1584                         output_conv = s_oconv;
1585                         break;
1586                     case WINDOWS_31J:
1587                         output_conv = s_oconv;
1588 #ifdef UTF8_OUTPUT_ENABLE
1589                         ms_ucs_map_f = UCS_MAP_CP932;
1590 #endif
1591                         break;
1592                     case CP10001:
1593                         output_conv = s_oconv;
1594 #ifdef UTF8_OUTPUT_ENABLE
1595                         ms_ucs_map_f = UCS_MAP_CP10001;
1596 #endif
1597                         break;
1598                     case EUC_JP:
1599                         output_conv = e_oconv;
1600                         break;
1601                     case CP51932:
1602                         output_conv = e_oconv;
1603 #ifdef SHIFTJIS_CP932
1604                         cp932inv_f = FALSE;
1605 #endif
1606 #ifdef UTF8_OUTPUT_ENABLE
1607                         ms_ucs_map_f = UCS_MAP_CP932;
1608 #endif
1609                         break;
1610                     case EUCJP_MS:
1611                         output_conv = e_oconv;
1612 #ifdef X0212_ENABLE
1613                         x0212_f = TRUE;
1614 #endif
1615 #ifdef UTF8_OUTPUT_ENABLE
1616                         ms_ucs_map_f = UCS_MAP_MS;
1617 #endif
1618                         break;
1619                     case EUCJP_ASCII:
1620                         output_conv = e_oconv;
1621 #ifdef X0212_ENABLE
1622                         x0212_f = TRUE;
1623 #endif
1624 #ifdef UTF8_OUTPUT_ENABLE
1625                         ms_ucs_map_f = UCS_MAP_ASCII;
1626 #endif
1627                         break;
1628                     case SHIFT_JISX0213:
1629                     case SHIFT_JIS_2004:
1630                             output_conv = s_oconv;
1631                             x0213_f = TRUE;
1632 #ifdef SHIFTJIS_CP932
1633                             cp932inv_f = FALSE;
1634 #endif
1635                         break;
1636                     case EUC_JISX0213:
1637                     case EUC_JIS_2004:
1638                         output_conv = e_oconv;
1639 #ifdef X0212_ENABLE
1640                         x0212_f = TRUE;
1641 #endif
1642                         x0213_f = TRUE;
1643 #ifdef SHIFTJIS_CP932
1644                         cp932inv_f = FALSE;
1645 #endif
1646                         break;
1647 #ifdef UTF8_OUTPUT_ENABLE
1648                     case UTF_8:
1649                     case UTF_8N:
1650                         output_conv = w_oconv;
1651                         break;
1652                     case UTF_8_BOM:
1653                         output_conv = w_oconv;
1654                         output_bom_f = TRUE;
1655                         break;
1656                     case UTF_16BE:
1657                         output_conv = w_oconv16;
1658                         break;
1659                     case UTF_16:
1660                     case UTF_16BE_BOM:
1661                         output_conv = w_oconv16;
1662                         output_bom_f = TRUE;
1663                         break;
1664                     case UTF_16LE:
1665                         output_conv = w_oconv16;
1666                         output_endian = ENDIAN_LITTLE;
1667                         break;
1668                     case UTF_16LE_BOM:
1669                         output_conv = w_oconv16;
1670                         output_endian = ENDIAN_LITTLE;
1671                         output_bom_f = TRUE;
1672                         break;
1673                     case UTF_32:
1674                     case UTF_32BE:
1675                         output_conv = w_oconv32;
1676                         break;
1677                     case UTF_32BE_BOM:
1678                         output_conv = w_oconv32;
1679                         output_bom_f = TRUE;
1680                         break;
1681                     case UTF_32LE:
1682                         output_conv = w_oconv32;
1683                         output_endian = ENDIAN_LITTLE;
1684                         break;
1685                     case UTF_32LE_BOM:
1686                         output_conv = w_oconv32;
1687                         output_endian = ENDIAN_LITTLE;
1688                         output_bom_f = TRUE;
1689                         break;
1690 #endif
1691                     default:
1692                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1693                         break;
1694                     }
1695                     continue;
1696                 }
1697                 if (strcmp(long_option[i].name, "guess=") == 0){
1698                     if (p[0] == '1') {
1699                         guess_f = 2;
1700                     } else {
1701                         guess_f = 1;
1702                     }
1703                     continue;
1704                 }
1705 #ifdef OVERWRITE
1706                 if (strcmp(long_option[i].name, "overwrite") == 0){
1707                     file_out_f = TRUE;
1708                     overwrite_f = TRUE;
1709                     preserve_time_f = TRUE;
1710                     continue;
1711                 }
1712                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1713                     file_out_f = TRUE;
1714                     overwrite_f = TRUE;
1715                     preserve_time_f = TRUE;
1716                     backup_f = TRUE;
1717                     backup_suffix = malloc(strlen((char *) p) + 1);
1718                     strcpy(backup_suffix, (char *) p);
1719                     continue;
1720                 }
1721                 if (strcmp(long_option[i].name, "in-place") == 0){
1722                     file_out_f = TRUE;
1723                     overwrite_f = TRUE;
1724                     preserve_time_f = FALSE;
1725                     continue;
1726                 }
1727                 if (strcmp(long_option[i].name, "in-place=") == 0){
1728                     file_out_f = TRUE;
1729                     overwrite_f = TRUE;
1730                     preserve_time_f = FALSE;
1731                     backup_f = TRUE;
1732                     backup_suffix = malloc(strlen((char *) p) + 1);
1733                     strcpy(backup_suffix, (char *) p);
1734                     continue;
1735                 }
1736 #endif
1737 #ifdef INPUT_OPTION
1738                 if (strcmp(long_option[i].name, "cap-input") == 0){
1739                     cap_f = TRUE;
1740                     continue;
1741                 }
1742                 if (strcmp(long_option[i].name, "url-input") == 0){
1743                     url_f = TRUE;
1744                     continue;
1745                 }
1746 #endif
1747 #ifdef NUMCHAR_OPTION
1748                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1749                     numchar_f = TRUE;
1750                     continue;
1751                 }
1752 #endif
1753 #ifdef CHECK_OPTION
1754                 if (strcmp(long_option[i].name, "no-output") == 0){
1755                     noout_f = TRUE;
1756                     continue;
1757                 }
1758                 if (strcmp(long_option[i].name, "debug") == 0){
1759                     debug_f = TRUE;
1760                     continue;
1761                 }
1762 #endif
1763                 if (strcmp(long_option[i].name, "cp932") == 0){
1764 #ifdef SHIFTJIS_CP932
1765                     cp51932_f = TRUE;
1766                     cp932inv_f = TRUE;
1767 #endif
1768 #ifdef UTF8_OUTPUT_ENABLE
1769                     ms_ucs_map_f = UCS_MAP_CP932;
1770 #endif
1771                     continue;
1772                 }
1773                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1774 #ifdef SHIFTJIS_CP932
1775                     cp51932_f = FALSE;
1776                     cp932inv_f = FALSE;
1777 #endif
1778 #ifdef UTF8_OUTPUT_ENABLE
1779                     ms_ucs_map_f = UCS_MAP_ASCII;
1780 #endif
1781                     continue;
1782                 }
1783 #ifdef SHIFTJIS_CP932
1784                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1785                     cp932inv_f = TRUE;
1786                     continue;
1787                 }
1788 #endif
1789
1790 #ifdef X0212_ENABLE
1791                 if (strcmp(long_option[i].name, "x0212") == 0){
1792                     x0212_f = TRUE;
1793                     continue;
1794                 }
1795 #endif
1796
1797 #ifdef EXEC_IO
1798                   if (strcmp(long_option[i].name, "exec-in") == 0){
1799                       exec_f = 1;
1800                       return;
1801                   }
1802                   if (strcmp(long_option[i].name, "exec-out") == 0){
1803                       exec_f = -1;
1804                       return;
1805                   }
1806 #endif
1807 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1808                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1809                     no_cp932ext_f = TRUE;
1810                     continue;
1811                 }
1812                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1813                     no_best_fit_chars_f = TRUE;
1814                     continue;
1815                 }
1816                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1817                     encode_fallback = NULL;
1818                     continue;
1819                 }
1820                 if (strcmp(long_option[i].name, "fb-html") == 0){
1821                     encode_fallback = encode_fallback_html;
1822                     continue;
1823                 }
1824                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1825                     encode_fallback = encode_fallback_xml;
1826                     continue;
1827                 }
1828                 if (strcmp(long_option[i].name, "fb-java") == 0){
1829                     encode_fallback = encode_fallback_java;
1830                     continue;
1831                 }
1832                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1833                     encode_fallback = encode_fallback_perl;
1834                     continue;
1835                 }
1836                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1837                     encode_fallback = encode_fallback_subchar;
1838                     continue;
1839                 }
1840                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1841                     encode_fallback = encode_fallback_subchar;
1842                     unicode_subchar = 0;
1843                     if (p[0] != '0'){
1844                         /* decimal number */
1845                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1846                             unicode_subchar *= 10;
1847                             unicode_subchar += hex2bin(p[i]);
1848                         }
1849                     }else if(p[1] == 'x' || p[1] == 'X'){
1850                         /* hexadecimal number */
1851                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1852                             unicode_subchar <<= 4;
1853                             unicode_subchar |= hex2bin(p[i]);
1854                         }
1855                     }else{
1856                         /* octal number */
1857                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1858                             unicode_subchar *= 8;
1859                             unicode_subchar += hex2bin(p[i]);
1860                         }
1861                     }
1862                     w16e_conv(unicode_subchar, &i, &j);
1863                     unicode_subchar = i<<8 | j;
1864                     continue;
1865                 }
1866 #endif
1867 #ifdef UTF8_OUTPUT_ENABLE
1868                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1869                     ms_ucs_map_f = UCS_MAP_MS;
1870                     continue;
1871                 }
1872 #endif
1873 #ifdef UNICODE_NORMALIZATION
1874                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1875                     input_f = UTF8_INPUT;
1876                     nfc_f = TRUE;
1877                     continue;
1878                 }
1879 #endif
1880                 if (strcmp(long_option[i].name, "prefix=") == 0){
1881                     if (nkf_isgraph(p[0])){
1882                         for (i = 1; nkf_isgraph(p[i]); i++){
1883                             prefix_table[p[i]] = p[0];
1884                         }
1885                     }
1886                     continue;
1887                 }
1888             }
1889             continue;
1890         case 'b':           /* buffered mode */
1891             unbuf_f = FALSE;
1892             continue;
1893         case 'u':           /* non bufferd mode */
1894             unbuf_f = TRUE;
1895             continue;
1896         case 't':           /* transparent mode */
1897             if (*cp=='1') {
1898                 /* alias of -t */
1899                 cp++;
1900                 nop_f = TRUE;
1901             } else if (*cp=='2') {
1902                 /*
1903                  * -t with put/get
1904                  *
1905                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1906                  *
1907                  */
1908                 cp++;
1909                 nop_f = 2;
1910             } else
1911                 nop_f = TRUE;
1912             continue;
1913         case 'j':           /* JIS output */
1914         case 'n':
1915             output_conv = j_oconv;
1916             output_encoding = nkf_enc_from_index(ISO_2022_JP);
1917             continue;
1918         case 'e':           /* AT&T EUC output */
1919             output_conv = e_oconv;
1920             cp932inv_f = FALSE;
1921             output_encoding = nkf_enc_from_index(EUC_JP);
1922             continue;
1923         case 's':           /* SJIS output */
1924             output_conv = s_oconv;
1925             output_encoding = nkf_enc_from_index(SHIFT_JIS);
1926             continue;
1927         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1928             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1929             input_f = LATIN1_INPUT;
1930             continue;
1931         case 'i':           /* Kanji IN ESC-$-@/B */
1932             if (*cp=='@'||*cp=='B')
1933                 kanji_intro = *cp++;
1934             continue;
1935         case 'o':           /* ASCII IN ESC-(-J/B */
1936             if (*cp=='J'||*cp=='B'||*cp=='H')
1937                 ascii_intro = *cp++;
1938             continue;
1939         case 'h':
1940             /*
1941                 bit:1   katakana->hiragana
1942                 bit:2   hiragana->katakana
1943             */
1944             if ('9'>= *cp && *cp>='0')
1945                 hira_f |= (*cp++ -'0');
1946             else
1947                 hira_f |= 1;
1948             continue;
1949         case 'r':
1950             rot_f = TRUE;
1951             continue;
1952 #if defined(MSDOS) || defined(__OS2__)
1953         case 'T':
1954             binmode_f = FALSE;
1955             continue;
1956 #endif
1957 #ifndef PERL_XS
1958         case 'V':
1959             show_configuration();
1960             exit(1);
1961             break;
1962         case 'v':
1963             usage();
1964             exit(1);
1965             break;
1966 #endif
1967 #ifdef UTF8_OUTPUT_ENABLE
1968         case 'w':           /* UTF-8 output */
1969             if (cp[0] == '8') {
1970                 output_conv = w_oconv; cp++;
1971                 if (cp[0] == '0'){
1972                     cp++;
1973                     output_encoding = nkf_enc_from_index(UTF_8N);
1974                 } else {
1975                     output_bom_f = TRUE;
1976                     output_encoding = nkf_enc_from_index(UTF_8_BOM);
1977                 }
1978             } else {
1979                 int enc_idx;
1980                 if ('1'== cp[0] && '6'==cp[1]) {
1981                     output_conv = w_oconv16; cp+=2;
1982                     enc_idx = UTF_16;
1983                 } else if ('3'== cp[0] && '2'==cp[1]) {
1984                     output_conv = w_oconv32; cp+=2;
1985                     enc_idx = UTF_32;
1986                 } else {
1987                     output_conv = w_oconv;
1988                     output_encoding = nkf_enc_from_index(UTF_8);
1989                     continue;
1990                 }
1991                 if (cp[0]=='L') {
1992                     cp++;
1993                     output_endian = ENDIAN_LITTLE;
1994                 } else if (cp[0] == 'B') {
1995                     cp++;
1996                 } else {
1997                     output_encoding = nkf_enc_from_index(enc_idx);
1998                     continue;
1999                 }
2000                 if (cp[0] == '0'){
2001                     cp++;
2002                     enc_idx = enc_idx == UTF_16
2003                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
2004                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
2005                 } else {
2006                     output_bom_f = TRUE;
2007                     enc_idx = enc_idx == UTF_16
2008                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
2009                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
2010                 }
2011                 output_encoding = nkf_enc_from_index(enc_idx);
2012             }
2013             continue;
2014 #endif
2015 #ifdef UTF8_INPUT_ENABLE
2016         case 'W':           /* UTF input */
2017             if (cp[0] == '8') {
2018                 cp++;
2019                 input_f = UTF8_INPUT;
2020             }else{
2021                 if ('1'== cp[0] && '6'==cp[1]) {
2022                     cp += 2;
2023                     input_f = UTF16_INPUT;
2024                     input_endian = ENDIAN_BIG;
2025                 } else if ('3'== cp[0] && '2'==cp[1]) {
2026                     cp += 2;
2027                     input_f = UTF32_INPUT;
2028                     input_endian = ENDIAN_BIG;
2029                 } else {
2030                     input_f = UTF8_INPUT;
2031                     continue;
2032                 }
2033                 if (cp[0]=='L') {
2034                     cp++;
2035                     input_endian = ENDIAN_LITTLE;
2036                 } else if (cp[0] == 'B') {
2037                     cp++;
2038                 }
2039             }
2040             continue;
2041 #endif
2042         /* Input code assumption */
2043         case 'J':   /* JIS input */
2044             input_f = JIS_INPUT;
2045             continue;
2046         case 'E':   /* AT&T EUC input */
2047             input_f = EUC_INPUT;
2048             continue;
2049         case 'S':   /* MS Kanji input */
2050             input_f = SJIS_INPUT;
2051             continue;
2052         case 'Z':   /* Convert X0208 alphabet to asii */
2053             /* alpha_f
2054                bit:0   Convert JIS X 0208 Alphabet to ASCII
2055                bit:1   Convert Kankaku to one space
2056                bit:2   Convert Kankaku to two spaces
2057                bit:3   Convert HTML Entity
2058                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
2059             */
2060             while ('0'<= *cp && *cp <='9') {
2061                 alpha_f |= 1 << (*cp++ - '0');
2062             }
2063             if (!alpha_f) alpha_f = 1;
2064             continue;
2065         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
2066             x0201_f = FALSE;    /* No X0201->X0208 conversion */
2067             /* accept  X0201
2068                     ESC-(-I     in JIS, EUC, MS Kanji
2069                     SI/SO       in JIS, EUC, MS Kanji
2070                     SSO         in EUC, JIS, not in MS Kanji
2071                     MS Kanji (0xa0-0xdf)
2072                output  X0201
2073                     ESC-(-I     in JIS (0x20-0x5f)
2074                     SSO         in EUC (0xa0-0xdf)
2075                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
2076             */
2077             continue;
2078         case 'X':   /* Convert X0201 kana to X0208 */
2079             x0201_f = TRUE;
2080             continue;
2081         case 'F':   /* prserve new lines */
2082             fold_preserve_f = TRUE;
2083         case 'f':   /* folding -f60 or -f */
2084             fold_f = TRUE;
2085             fold_len = 0;
2086             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2087                 fold_len *= 10;
2088                 fold_len += *cp++ - '0';
2089             }
2090             if (!(0<fold_len && fold_len<BUFSIZ))
2091                 fold_len = DEFAULT_FOLD;
2092             if (*cp=='-') {
2093                 fold_margin = 0;
2094                 cp++;
2095                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2096                     fold_margin *= 10;
2097                     fold_margin += *cp++ - '0';
2098                 }
2099             }
2100             continue;
2101         case 'm':   /* MIME support */
2102             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
2103             if (*cp=='B'||*cp=='Q') {
2104                 mime_decode_mode = *cp++;
2105                 mimebuf_f = FIXED_MIME;
2106             } else if (*cp=='N') {
2107                 mime_f = TRUE; cp++;
2108             } else if (*cp=='S') {
2109                 mime_f = STRICT_MIME; cp++;
2110             } else if (*cp=='0') {
2111                 mime_decode_f = FALSE;
2112                 mime_f = FALSE; cp++;
2113             }
2114             continue;
2115         case 'M':   /* MIME output */
2116             if (*cp=='B') {
2117                 mimeout_mode = 'B';
2118                 mimeout_f = FIXED_MIME; cp++;
2119             } else if (*cp=='Q') {
2120                 mimeout_mode = 'Q';
2121                 mimeout_f = FIXED_MIME; cp++;
2122             } else {
2123                 mimeout_f = TRUE;
2124             }
2125             continue;
2126         case 'B':   /* Broken JIS support */
2127             /*  bit:0   no ESC JIS
2128                 bit:1   allow any x on ESC-(-x or ESC-$-x
2129                 bit:2   reset to ascii on NL
2130             */
2131             if ('9'>= *cp && *cp>='0')
2132                 broken_f |= 1<<(*cp++ -'0');
2133             else
2134                 broken_f |= TRUE;
2135             continue;
2136 #ifndef PERL_XS
2137         case 'O':/* for Output file */
2138             file_out_f = TRUE;
2139             continue;
2140 #endif
2141         case 'c':/* add cr code */
2142             nlmode_f = CRLF;
2143             continue;
2144         case 'd':/* delete cr code */
2145             nlmode_f = LF;
2146             continue;
2147         case 'I':   /* ISO-2022-JP output */
2148             iso2022jp_f = TRUE;
2149             continue;
2150         case 'L':  /* line mode */
2151             if (*cp=='u') {         /* unix */
2152                 nlmode_f = LF; cp++;
2153             } else if (*cp=='m') { /* mac */
2154                 nlmode_f = CR; cp++;
2155             } else if (*cp=='w') { /* windows */
2156                 nlmode_f = CRLF; cp++;
2157             } else if (*cp=='0') { /* no conversion  */
2158                 nlmode_f = 0; cp++;
2159             }
2160             continue;
2161 #ifndef PERL_XS
2162         case 'g':
2163             if (*cp == '1') {
2164                 guess_f = 2;
2165                 cp++;
2166             } else if (*cp == '0') {
2167                 guess_f = 1;
2168                 cp++;
2169             } else {
2170                 guess_f = 1;
2171             }
2172             continue;
2173 #endif
2174         case SP:
2175         /* module muliple options in a string are allowed for Perl moudle  */
2176             while(*cp && *cp++!='-');
2177             continue;
2178         default:
2179             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
2180             /* bogus option but ignored */
2181             continue;
2182         }
2183     }
2184 }
2185
2186 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2187 {
2188     if (iconv_func){
2189         struct input_code *p = input_code_list;
2190         while (p->name){
2191             if (iconv_func == p->iconv_func){
2192                 return p;
2193             }
2194             p++;
2195         }
2196     }
2197     return 0;
2198 }
2199
2200 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2201 {
2202 #ifdef INPUT_CODE_FIX
2203     if (f || !input_f)
2204 #endif
2205         if (estab_f != f){
2206             estab_f = f;
2207         }
2208
2209     if (iconv_func
2210 #ifdef INPUT_CODE_FIX
2211         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
2212 #endif
2213         ){
2214         iconv = iconv_func;
2215     }
2216 #ifdef CHECK_OPTION
2217     if (estab_f && iconv_for_check != iconv){
2218         struct input_code *p = find_inputcode_byfunc(iconv);
2219         if (p){
2220             set_input_codename(p->name);
2221             debug(p->name);
2222         }
2223         iconv_for_check = iconv;
2224     }
2225 #endif
2226 }
2227
2228 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
2229 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
2230 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
2231 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
2232 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2233 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
2234 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
2235 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
2236
2237 #define SCORE_INIT (SCORE_iMIME)
2238
2239 static const char score_table_A0[] = {
2240     0, 0, 0, 0,
2241     0, 0, 0, 0,
2242     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2243     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2244 };
2245
2246 static const char score_table_F0[] = {
2247     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2248     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2249     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2250     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2251 };
2252
2253 void set_code_score(struct input_code *ptr, nkf_char score)
2254 {
2255     if (ptr){
2256         ptr->score |= score;
2257     }
2258 }
2259
2260 void clr_code_score(struct input_code *ptr, nkf_char score)
2261 {
2262     if (ptr){
2263         ptr->score &= ~score;
2264     }
2265 }
2266
2267 void code_score(struct input_code *ptr)
2268 {
2269     nkf_char c2 = ptr->buf[0];
2270 #ifdef UTF8_OUTPUT_ENABLE
2271     nkf_char c1 = ptr->buf[1];
2272 #endif
2273     if (c2 < 0){
2274         set_code_score(ptr, SCORE_ERROR);
2275     }else if (c2 == SSO){
2276         set_code_score(ptr, SCORE_KANA);
2277     }else if (c2 == 0x8f){
2278         set_code_score(ptr, SCORE_X0212);
2279 #ifdef UTF8_OUTPUT_ENABLE
2280     }else if (!e2w_conv(c2, c1)){
2281         set_code_score(ptr, SCORE_NO_EXIST);
2282 #endif
2283     }else if ((c2 & 0x70) == 0x20){
2284         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2285     }else if ((c2 & 0x70) == 0x70){
2286         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2287     }else if ((c2 & 0x70) >= 0x50){
2288         set_code_score(ptr, SCORE_L2);
2289     }
2290 }
2291
2292 void status_disable(struct input_code *ptr)
2293 {
2294     ptr->stat = -1;
2295     ptr->buf[0] = -1;
2296     code_score(ptr);
2297     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2298 }
2299
2300 void status_push_ch(struct input_code *ptr, nkf_char c)
2301 {
2302     ptr->buf[ptr->index++] = c;
2303 }
2304
2305 void status_clear(struct input_code *ptr)
2306 {
2307     ptr->stat = 0;
2308     ptr->index = 0;
2309 }
2310
2311 void status_reset(struct input_code *ptr)
2312 {
2313     status_clear(ptr);
2314     ptr->score = SCORE_INIT;
2315 }
2316
2317 void status_reinit(struct input_code *ptr)
2318 {
2319     status_reset(ptr);
2320     ptr->_file_stat = 0;
2321 }
2322
2323 void status_check(struct input_code *ptr, nkf_char c)
2324 {
2325     if (c <= DEL && estab_f){
2326         status_reset(ptr);
2327     }
2328 }
2329
2330 void s_status(struct input_code *ptr, nkf_char c)
2331 {
2332     switch(ptr->stat){
2333       case -1:
2334           status_check(ptr, c);
2335           break;
2336       case 0:
2337           if (c <= DEL){
2338               break;
2339 #ifdef NUMCHAR_OPTION
2340           }else if (is_unicode_capsule(c)){
2341               break;
2342 #endif
2343           }else if (0xa1 <= c && c <= 0xdf){
2344               status_push_ch(ptr, SSO);
2345               status_push_ch(ptr, c);
2346               code_score(ptr);
2347               status_clear(ptr);
2348           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2349               ptr->stat = 1;
2350               status_push_ch(ptr, c);
2351           }else if (0xed <= c && c <= 0xee){
2352               ptr->stat = 3;
2353               status_push_ch(ptr, c);
2354 #ifdef SHIFTJIS_CP932
2355           }else if (is_ibmext_in_sjis(c)){
2356               ptr->stat = 2;
2357               status_push_ch(ptr, c);
2358 #endif /* SHIFTJIS_CP932 */
2359 #ifdef X0212_ENABLE
2360           }else if (0xf0 <= c && c <= 0xfc){
2361               ptr->stat = 1;
2362               status_push_ch(ptr, c);
2363 #endif /* X0212_ENABLE */
2364           }else{
2365               status_disable(ptr);
2366           }
2367           break;
2368       case 1:
2369           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2370               status_push_ch(ptr, c);
2371               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2372               code_score(ptr);
2373               status_clear(ptr);
2374           }else{
2375               status_disable(ptr);
2376           }
2377           break;
2378       case 2:
2379 #ifdef SHIFTJIS_CP932
2380         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2381             status_push_ch(ptr, c);
2382             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2383                 set_code_score(ptr, SCORE_CP932);
2384                 status_clear(ptr);
2385                 break;
2386             }
2387         }
2388 #endif /* SHIFTJIS_CP932 */
2389         status_disable(ptr);
2390           break;
2391       case 3:
2392           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2393               status_push_ch(ptr, c);
2394               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2395             set_code_score(ptr, SCORE_CP932);
2396             status_clear(ptr);
2397           }else{
2398               status_disable(ptr);
2399           }
2400           break;
2401     }
2402 }
2403
2404 void e_status(struct input_code *ptr, nkf_char c)
2405 {
2406     switch (ptr->stat){
2407       case -1:
2408           status_check(ptr, c);
2409           break;
2410       case 0:
2411           if (c <= DEL){
2412               break;
2413 #ifdef NUMCHAR_OPTION
2414           }else if (is_unicode_capsule(c)){
2415               break;
2416 #endif
2417           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2418               ptr->stat = 1;
2419               status_push_ch(ptr, c);
2420 #ifdef X0212_ENABLE
2421           }else if (0x8f == c){
2422               ptr->stat = 2;
2423               status_push_ch(ptr, c);
2424 #endif /* X0212_ENABLE */
2425           }else{
2426               status_disable(ptr);
2427           }
2428           break;
2429       case 1:
2430           if (0xa1 <= c && c <= 0xfe){
2431               status_push_ch(ptr, c);
2432               code_score(ptr);
2433               status_clear(ptr);
2434           }else{
2435               status_disable(ptr);
2436           }
2437           break;
2438 #ifdef X0212_ENABLE
2439       case 2:
2440           if (0xa1 <= c && c <= 0xfe){
2441               ptr->stat = 1;
2442               status_push_ch(ptr, c);
2443           }else{
2444               status_disable(ptr);
2445           }
2446 #endif /* X0212_ENABLE */
2447     }
2448 }
2449
2450 #ifdef UTF8_INPUT_ENABLE
2451 void w_status(struct input_code *ptr, nkf_char c)
2452 {
2453     switch (ptr->stat){
2454       case -1:
2455           status_check(ptr, c);
2456           break;
2457       case 0:
2458           if (c <= DEL){
2459               break;
2460 #ifdef NUMCHAR_OPTION
2461           }else if (is_unicode_capsule(c)){
2462               break;
2463 #endif
2464           }else if (0xc0 <= c && c <= 0xdf){
2465               ptr->stat = 1;
2466               status_push_ch(ptr, c);
2467           }else if (0xe0 <= c && c <= 0xef){
2468               ptr->stat = 2;
2469               status_push_ch(ptr, c);
2470           }else if (0xf0 <= c && c <= 0xf4){
2471               ptr->stat = 3;
2472               status_push_ch(ptr, c);
2473           }else{
2474               status_disable(ptr);
2475           }
2476           break;
2477       case 1:
2478       case 2:
2479           if (0x80 <= c && c <= 0xbf){
2480               status_push_ch(ptr, c);
2481               if (ptr->index > ptr->stat){
2482                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2483                              && ptr->buf[2] == 0xbf);
2484                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2485                            &ptr->buf[0], &ptr->buf[1]);
2486                   if (!bom){
2487                       code_score(ptr);
2488                   }
2489                   status_clear(ptr);
2490               }
2491           }else{
2492               status_disable(ptr);
2493           }
2494           break;
2495       case 3:
2496         if (0x80 <= c && c <= 0xbf){
2497             if (ptr->index < ptr->stat){
2498                 status_push_ch(ptr, c);
2499             } else {
2500                 status_clear(ptr);
2501             }
2502           }else{
2503               status_disable(ptr);
2504           }
2505           break;
2506     }
2507 }
2508 #endif
2509
2510 void code_status(nkf_char c)
2511 {
2512     int action_flag = 1;
2513     struct input_code *result = 0;
2514     struct input_code *p = input_code_list;
2515     while (p->name){
2516         if (!p->status_func) {
2517             ++p;
2518             continue;
2519         }
2520         if (!p->status_func)
2521             continue;
2522         (p->status_func)(p, c);
2523         if (p->stat > 0){
2524             action_flag = 0;
2525         }else if(p->stat == 0){
2526             if (result){
2527                 action_flag = 0;
2528             }else{
2529                 result = p;
2530             }
2531         }
2532         ++p;
2533     }
2534
2535     if (action_flag){
2536         if (result && !estab_f){
2537             set_iconv(TRUE, result->iconv_func);
2538         }else if (c <= DEL){
2539             struct input_code *ptr = input_code_list;
2540             while (ptr->name){
2541                 status_reset(ptr);
2542                 ++ptr;
2543             }
2544         }
2545     }
2546 }
2547
2548 #ifndef WIN32DLL
2549 nkf_char std_getc(FILE *f)
2550 {
2551     if (std_gc_ndx){
2552         return std_gc_buf[--std_gc_ndx];
2553     }
2554     return getc(f);
2555 }
2556 #endif /*WIN32DLL*/
2557
2558 nkf_char std_ungetc(nkf_char c, FILE *f)
2559 {
2560     if (std_gc_ndx == STD_GC_BUFSIZE){
2561         return EOF;
2562     }
2563     std_gc_buf[std_gc_ndx++] = c;
2564     return c;
2565 }
2566
2567 #ifndef WIN32DLL
2568 void std_putc(nkf_char c)
2569 {
2570     if(c!=EOF)
2571       putchar(c);
2572 }
2573 #endif /*WIN32DLL*/
2574
2575 #if !defined(PERL_XS) && !defined(WIN32DLL)
2576 nkf_char noconvert(FILE *f)
2577 {
2578     nkf_char    c;
2579
2580     if (nop_f == 2)
2581         module_connection();
2582     while ((c = (*i_getc)(f)) != EOF)
2583       (*o_putc)(c);
2584     (*o_putc)(EOF);
2585     return 1;
2586 }
2587 #endif
2588
2589 void module_connection(void)
2590 {
2591     oconv = output_conv;
2592     o_putc = std_putc;
2593
2594     /* replace continucation module, from output side */
2595
2596     /* output redicrection */
2597 #ifdef CHECK_OPTION
2598     if (noout_f || guess_f){
2599         o_putc = no_putc;
2600     }
2601 #endif
2602     if (mimeout_f) {
2603         o_mputc = o_putc;
2604         o_putc = mime_putc;
2605         if (mimeout_f == TRUE) {
2606             o_base64conv = oconv; oconv = base64_conv;
2607         }
2608         /* base64_count = 0; */
2609     }
2610
2611     if (nlmode_f || guess_f) {
2612         o_nlconv = oconv; oconv = nl_conv;
2613     }
2614     if (rot_f) {
2615         o_rot_conv = oconv; oconv = rot_conv;
2616     }
2617     if (iso2022jp_f) {
2618         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2619     }
2620     if (hira_f) {
2621         o_hira_conv = oconv; oconv = hira_conv;
2622     }
2623     if (fold_f) {
2624         o_fconv = oconv; oconv = fold_conv;
2625         f_line = 0;
2626     }
2627     if (alpha_f || x0201_f) {
2628         o_zconv = oconv; oconv = z_conv;
2629     }
2630
2631     i_getc = std_getc;
2632     i_ungetc = std_ungetc;
2633     /* input redicrection */
2634 #ifdef INPUT_OPTION
2635     if (cap_f){
2636         i_cgetc = i_getc; i_getc = cap_getc;
2637         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2638     }
2639     if (url_f){
2640         i_ugetc = i_getc; i_getc = url_getc;
2641         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2642     }
2643 #endif
2644 #ifdef NUMCHAR_OPTION
2645     if (numchar_f){
2646         i_ngetc = i_getc; i_getc = numchar_getc;
2647         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2648     }
2649 #endif
2650 #ifdef UNICODE_NORMALIZATION
2651     if (nfc_f && input_f == UTF8_INPUT){
2652         i_nfc_getc = i_getc; i_getc = nfc_getc;
2653         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2654     }
2655 #endif
2656     if (mime_f && mimebuf_f==FIXED_MIME) {
2657         i_mgetc = i_getc; i_getc = mime_getc;
2658         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2659     }
2660     if (broken_f & 1) {
2661         i_bgetc = i_getc; i_getc = broken_getc;
2662         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2663     }
2664     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2665         set_iconv(-TRUE, e_iconv);
2666     } else if (input_f == SJIS_INPUT) {
2667         set_iconv(-TRUE, s_iconv);
2668 #ifdef UTF8_INPUT_ENABLE
2669     } else if (input_f == UTF8_INPUT) {
2670         set_iconv(-TRUE, w_iconv);
2671     } else if (input_f == UTF16_INPUT) {
2672         set_iconv(-TRUE, w_iconv16);
2673     } else if (input_f == UTF32_INPUT) {
2674         set_iconv(-TRUE, w_iconv32);
2675 #endif
2676     } else {
2677         set_iconv(FALSE, e_iconv);
2678     }
2679
2680     {
2681         struct input_code *p = input_code_list;
2682         while (p->name){
2683             status_reinit(p++);
2684         }
2685     }
2686 }
2687
2688 /*
2689  * Check and Ignore BOM
2690  */
2691 void check_bom(FILE *f)
2692 {
2693     int c2;
2694     switch(c2 = (*i_getc)(f)){
2695     case 0x00:
2696         if((c2 = (*i_getc)(f)) == 0x00){
2697             if((c2 = (*i_getc)(f)) == 0xFE){
2698                 if((c2 = (*i_getc)(f)) == 0xFF){
2699                     if(!input_f){
2700                         set_iconv(TRUE, w_iconv32);
2701                     }
2702                     if (iconv == w_iconv32) {
2703                         input_endian = ENDIAN_BIG;
2704                         return;
2705                     }
2706                     (*i_ungetc)(0xFF,f);
2707                 }else (*i_ungetc)(c2,f);
2708                 (*i_ungetc)(0xFE,f);
2709             }else if(c2 == 0xFF){
2710                 if((c2 = (*i_getc)(f)) == 0xFE){
2711                     if(!input_f){
2712                         set_iconv(TRUE, w_iconv32);
2713                     }
2714                     if (iconv == w_iconv32) {
2715                         input_endian = ENDIAN_2143;
2716                         return;
2717                     }
2718                     (*i_ungetc)(0xFF,f);
2719                 }else (*i_ungetc)(c2,f);
2720                 (*i_ungetc)(0xFF,f);
2721             }else (*i_ungetc)(c2,f);
2722             (*i_ungetc)(0x00,f);
2723         }else (*i_ungetc)(c2,f);
2724         (*i_ungetc)(0x00,f);
2725         break;
2726     case 0xEF:
2727         if((c2 = (*i_getc)(f)) == 0xBB){
2728             if((c2 = (*i_getc)(f)) == 0xBF){
2729                 if(!input_f){
2730                     set_iconv(TRUE, w_iconv);
2731                 }
2732                 if (iconv == w_iconv) {
2733                     return;
2734                 }
2735                 (*i_ungetc)(0xBF,f);
2736             }else (*i_ungetc)(c2,f);
2737             (*i_ungetc)(0xBB,f);
2738         }else (*i_ungetc)(c2,f);
2739         (*i_ungetc)(0xEF,f);
2740         break;
2741     case 0xFE:
2742         if((c2 = (*i_getc)(f)) == 0xFF){
2743             if((c2 = (*i_getc)(f)) == 0x00){
2744                 if((c2 = (*i_getc)(f)) == 0x00){
2745                     if(!input_f){
2746                         set_iconv(TRUE, w_iconv32);
2747                     }
2748                     if (iconv == w_iconv32) {
2749                         input_endian = ENDIAN_3412;
2750                         return;
2751                     }
2752                     (*i_ungetc)(0x00,f);
2753                 }else (*i_ungetc)(c2,f);
2754                 (*i_ungetc)(0x00,f);
2755             }else (*i_ungetc)(c2,f);
2756             if(!input_f){
2757                 set_iconv(TRUE, w_iconv16);
2758             }
2759             if (iconv == w_iconv16) {
2760                 input_endian = ENDIAN_BIG;
2761                 return;
2762             }
2763             (*i_ungetc)(0xFF,f);
2764         }else (*i_ungetc)(c2,f);
2765         (*i_ungetc)(0xFE,f);
2766         break;
2767     case 0xFF:
2768         if((c2 = (*i_getc)(f)) == 0xFE){
2769             if((c2 = (*i_getc)(f)) == 0x00){
2770                 if((c2 = (*i_getc)(f)) == 0x00){
2771                     if(!input_f){
2772                         set_iconv(TRUE, w_iconv32);
2773                     }
2774                     if (iconv == w_iconv32) {
2775                         input_endian = ENDIAN_LITTLE;
2776                         return;
2777                     }
2778                     (*i_ungetc)(0x00,f);
2779                 }else (*i_ungetc)(c2,f);
2780                 (*i_ungetc)(0x00,f);
2781             }else (*i_ungetc)(c2,f);
2782             if(!input_f){
2783                 set_iconv(TRUE, w_iconv16);
2784             }
2785             if (iconv == w_iconv16) {
2786                 input_endian = ENDIAN_LITTLE;
2787                 return;
2788             }
2789             (*i_ungetc)(0xFE,f);
2790         }else (*i_ungetc)(c2,f);
2791         (*i_ungetc)(0xFF,f);
2792         break;
2793     default:
2794         (*i_ungetc)(c2,f);
2795         break;
2796     }
2797 }
2798
2799 /*
2800    Conversion main loop. Code detection only.
2801  */
2802
2803 nkf_char kanji_convert(FILE *f)
2804 {
2805     nkf_char    c3, c2=0, c1, c0=0;
2806     int is_8bit = FALSE;
2807
2808     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2809 #ifdef UTF8_INPUT_ENABLE
2810        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2811 #endif
2812       ){
2813         is_8bit = TRUE;
2814     }
2815
2816     input_mode = ASCII;
2817     output_mode = ASCII;
2818     shift_mode = FALSE;
2819
2820 #define NEXT continue      /* no output, get next */
2821 #define SEND ;             /* output c1 and c2, get next */
2822 #define LAST break         /* end of loop, go closing  */
2823
2824     module_connection();
2825     check_bom(f);
2826
2827     while ((c1 = (*i_getc)(f)) != EOF) {
2828 #ifdef INPUT_CODE_FIX
2829         if (!input_f)
2830 #endif
2831             code_status(c1);
2832         if (c2) {
2833             /* second byte */
2834             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2835                 /* in case of 8th bit is on */
2836                 if (!estab_f&&!mime_decode_mode) {
2837                     /* in case of not established yet */
2838                     /* It is still ambiguious */
2839                     if (h_conv(f, c2, c1)==EOF)
2840                         LAST;
2841                     else
2842                         c2 = 0;
2843                     NEXT;
2844                 } else {
2845                     /* in case of already established */
2846                     if (c1 < AT) {
2847                         /* ignore bogus code and not CP5022x UCD */
2848                         c2 = 0;
2849                         NEXT;
2850                     } else {
2851                         SEND;
2852                     }
2853                 }
2854             } else
2855                 /* second byte, 7 bit code */
2856                 /* it might be kanji shitfted */
2857                 if ((c1 == DEL) || (c1 <= SP)) {
2858                     /* ignore bogus first code */
2859                     c2 = 0;
2860                     NEXT;
2861                 } else
2862                     SEND;
2863         } else {
2864             /* first byte */
2865 #ifdef UTF8_INPUT_ENABLE
2866             if (iconv == w_iconv16) {
2867                 if (input_endian == ENDIAN_BIG) {
2868                     c2 = c1;
2869                     if ((c1 = (*i_getc)(f)) != EOF) {
2870                         if (0xD8 <= c2 && c2 <= 0xDB) {
2871                             if ((c0 = (*i_getc)(f)) != EOF) {
2872                                 c0 <<= 8;
2873                                 if ((c3 = (*i_getc)(f)) != EOF) {
2874                                     c0 |= c3;
2875                                 } else c2 = EOF;
2876                             } else c2 = EOF;
2877                         }
2878                     } else c2 = EOF;
2879                 } else {
2880                     if ((c2 = (*i_getc)(f)) != EOF) {
2881                         if (0xD8 <= c2 && c2 <= 0xDB) {
2882                             if ((c3 = (*i_getc)(f)) != EOF) {
2883                                 if ((c0 = (*i_getc)(f)) != EOF) {
2884                                     c0 <<= 8;
2885                                     c0 |= c3;
2886                                 } else c2 = EOF;
2887                             } else c2 = EOF;
2888                         }
2889                     } else c2 = EOF;
2890                 }
2891                 SEND;
2892             } else if(iconv == w_iconv32){
2893                 int c3 = c1;
2894                 if((c2 = (*i_getc)(f)) != EOF &&
2895                    (c1 = (*i_getc)(f)) != EOF &&
2896                    (c0 = (*i_getc)(f)) != EOF){
2897                     switch(input_endian){
2898                     case ENDIAN_BIG:
2899                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2900                         break;
2901                     case ENDIAN_LITTLE:
2902                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2903                         break;
2904                     case ENDIAN_2143:
2905                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2906                         break;
2907                     case ENDIAN_3412:
2908                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2909                         break;
2910                     }
2911                     c2 = 0;
2912                 }else{
2913                     c2 = EOF;
2914                 }
2915                 SEND;
2916             } else
2917 #endif
2918 #ifdef NUMCHAR_OPTION
2919             if (is_unicode_capsule(c1)){
2920                 SEND;
2921             } else
2922 #endif
2923             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2924                 /* 8 bit code */
2925                 if (!estab_f && !iso8859_f) {
2926                     /* not established yet */
2927                     c2 = c1;
2928                     NEXT;
2929                 } else { /* estab_f==TRUE */
2930                     if (iso8859_f) {
2931                         c2 = ISO_8859_1;
2932                         c1 &= 0x7f;
2933                         SEND;
2934                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2935                         /* SJIS X0201 Case... */
2936                         if (iso2022jp_f && !x0201_f) {
2937                             (*oconv)(GETA1, GETA2);
2938                             NEXT;
2939                         } else {
2940                             c2 = JIS_X_0201;
2941                             c1 &= 0x7f;
2942                             SEND;
2943                         }
2944                     } else if (c1==SSO && iconv != s_iconv) {
2945                         /* EUC X0201 Case */
2946                         c1 = (*i_getc)(f);  /* skip SSO */
2947                         code_status(c1);
2948                         if (SSP<=c1 && c1<0xe0) {
2949                             if (iso2022jp_f && !x0201_f) {
2950                                 (*oconv)(GETA1, GETA2);
2951                                 NEXT;
2952                             } else {
2953                                 c2 = JIS_X_0201;
2954                                 c1 &= 0x7f;
2955                                 SEND;
2956                             }
2957                         } else  { /* bogus code, skip SSO and one byte */
2958                             NEXT;
2959                         }
2960                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2961                                (c1 == 0xFD || c1 == 0xFE)) {
2962                         /* CP10001 */
2963                         c2 = JIS_X_0201;
2964                         c1 &= 0x7f;
2965                         SEND;
2966                     } else {
2967                        /* already established */
2968                        c2 = c1;
2969                        NEXT;
2970                     }
2971                 }
2972             } else if ((c1 > SP) && (c1 != DEL)) {
2973                 /* in case of Roman characters */
2974                 if (shift_mode) {
2975                     /* output 1 shifted byte */
2976                     if (iso8859_f) {
2977                         c2 = ISO_8859_1;
2978                         SEND;
2979                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2980                       /* output 1 shifted byte */
2981                         if (iso2022jp_f && !x0201_f) {
2982                             (*oconv)(GETA1, GETA2);
2983                             NEXT;
2984                         } else {
2985                             c2 = JIS_X_0201;
2986                             SEND;
2987                         }
2988                     } else {
2989                         /* look like bogus code */
2990                         NEXT;
2991                     }
2992                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
2993                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
2994                     /* in case of Kanji shifted */
2995                     c2 = c1;
2996                     NEXT;
2997                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2998                     /* Check MIME code */
2999                     if ((c1 = (*i_getc)(f)) == EOF) {
3000                         (*oconv)(0, '=');
3001                         LAST;
3002                     } else if (c1 == '?') {
3003                         /* =? is mime conversion start sequence */
3004                         if(mime_f == STRICT_MIME) {
3005                             /* check in real detail */
3006                             if (mime_begin_strict(f) == EOF)
3007                                 LAST;
3008                             else
3009                                 NEXT;
3010                         } else if (mime_begin(f) == EOF)
3011                             LAST;
3012                         else
3013                             NEXT;
3014                     } else {
3015                         (*oconv)(0, '=');
3016                         (*i_ungetc)(c1,f);
3017                         NEXT;
3018                     }
3019                 } else {
3020                     /* normal ASCII code */
3021                     SEND;
3022                 }
3023             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
3024                 shift_mode = FALSE;
3025                 NEXT;
3026             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
3027                 shift_mode = TRUE;
3028                 NEXT;
3029             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
3030                 if ((c1 = (*i_getc)(f)) == EOF) {
3031                     /*  (*oconv)(0, ESC); don't send bogus code */
3032                     LAST;
3033                 } else if (c1 == '$') {
3034                     if ((c1 = (*i_getc)(f)) == EOF) {
3035                         /*
3036                         (*oconv)(0, ESC); don't send bogus code
3037                         (*oconv)(0, '$'); */
3038                         LAST;
3039                     } else if (c1 == '@'|| c1 == 'B') {
3040                         /* This is kanji introduction */
3041                         input_mode = JIS_X_0208;
3042                         shift_mode = FALSE;
3043                         set_input_codename("ISO-2022-JP");
3044 #ifdef CHECK_OPTION
3045                         debug("ISO-2022-JP");
3046 #endif
3047                         NEXT;
3048                     } else if (c1 == '(') {
3049                         if ((c1 = (*i_getc)(f)) == EOF) {
3050                             /* don't send bogus code
3051                             (*oconv)(0, ESC);
3052                             (*oconv)(0, '$');
3053                             (*oconv)(0, '(');
3054                                 */
3055                             LAST;
3056                         } else if (c1 == '@'|| c1 == 'B') {
3057                             /* This is kanji introduction */
3058                             input_mode = JIS_X_0208;
3059                             shift_mode = FALSE;
3060                             NEXT;
3061 #ifdef X0212_ENABLE
3062                         } else if (c1 == 'D'){
3063                             input_mode = JIS_X_0212;
3064                             shift_mode = FALSE;
3065                             NEXT;
3066 #endif /* X0212_ENABLE */
3067                         } else if (c1 == 0x4F){
3068                             input_mode = JIS_X_0213_1;
3069                             shift_mode = FALSE;
3070                             NEXT;
3071                         } else if (c1 == 0x50){
3072                             input_mode = JIS_X_0213_2;
3073                             shift_mode = FALSE;
3074                             NEXT;
3075                         } else {
3076                             /* could be some special code */
3077                             (*oconv)(0, ESC);
3078                             (*oconv)(0, '$');
3079                             (*oconv)(0, '(');
3080                             (*oconv)(0, c1);
3081                             NEXT;
3082                         }
3083                     } else if (broken_f&0x2) {
3084                         /* accept any ESC-(-x as broken code ... */
3085                         input_mode = JIS_X_0208;
3086                         shift_mode = FALSE;
3087                         NEXT;
3088                     } else {
3089                         (*oconv)(0, ESC);
3090                         (*oconv)(0, '$');
3091                         (*oconv)(0, c1);
3092                         NEXT;
3093                     }
3094                 } else if (c1 == '(') {
3095                     if ((c1 = (*i_getc)(f)) == EOF) {
3096                         /* don't send bogus code
3097                         (*oconv)(0, ESC);
3098                         (*oconv)(0, '('); */
3099                         LAST;
3100                     } else {
3101                         if (c1 == 'I') {
3102                             /* This is X0201 kana introduction */
3103                             input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
3104                             NEXT;
3105                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
3106                             /* This is X0208 kanji introduction */
3107                             input_mode = ASCII; shift_mode = FALSE;
3108                             NEXT;
3109                         } else if (broken_f&0x2) {
3110                             input_mode = ASCII; shift_mode = FALSE;
3111                             NEXT;
3112                         } else {
3113                             (*oconv)(0, ESC);
3114                             (*oconv)(0, '(');
3115                             /* maintain various input_mode here */
3116                             SEND;
3117                         }
3118                     }
3119                } else if ( c1 == 'N' || c1 == 'n'){
3120                    /* SS2 */
3121                    c3 = (*i_getc)(f);  /* skip SS2 */
3122                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
3123                        c1 = c3;
3124                        c2 = JIS_X_0201;
3125                        SEND;
3126                    }else{
3127                        (*i_ungetc)(c3, f);
3128                        /* lonely ESC  */
3129                        (*oconv)(0, ESC);
3130                        SEND;
3131                    }
3132                 } else {
3133                     /* lonely ESC  */
3134                     (*oconv)(0, ESC);
3135                     SEND;
3136                 }
3137             } else if (c1 == ESC && iconv == s_iconv) {
3138                 /* ESC in Shift_JIS */
3139                 if ((c1 = (*i_getc)(f)) == EOF) {
3140                     /*  (*oconv)(0, ESC); don't send bogus code */
3141                     LAST;
3142                 } else if (c1 == '$') {
3143                     /* J-PHONE emoji */
3144                     if ((c1 = (*i_getc)(f)) == EOF) {
3145                         /*
3146                            (*oconv)(0, ESC); don't send bogus code
3147                            (*oconv)(0, '$'); */
3148                         LAST;
3149                     } else {
3150                         if (('E' <= c1 && c1 <= 'G') ||
3151                             ('O' <= c1 && c1 <= 'Q')) {
3152                             /*
3153                                NUM : 0 1 2 3 4 5
3154                                BYTE: G E F O P Q
3155                                C%7 : 1 6 0 2 3 4
3156                                C%7 : 0 1 2 3 4 5 6
3157                                NUM : 2 0 3 4 5 X 1
3158                              */
3159                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
3160                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
3161                             while ((c1 = (*i_getc)(f)) != EOF) {
3162                                 if (SP <= c1 && c1 <= 'z') {
3163                                     (*oconv)(0, c1 + c0);
3164                                 } else break; /* c1 == SO */
3165                             }
3166                         }
3167                     }
3168                     if (c1 == EOF) LAST;
3169                     NEXT;
3170                 } else {
3171                     /* lonely ESC  */
3172                     (*oconv)(0, ESC);
3173                     SEND;
3174                 }
3175             } else if (c1 == LF || c1 == CR) {
3176                 if (broken_f&4) {
3177                     input_mode = ASCII; set_iconv(FALSE, 0);
3178                     SEND;
3179                 } else if (mime_decode_f && !mime_decode_mode){
3180                     if (c1 == LF) {
3181                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
3182                             i_ungetc(SP,f);
3183                             continue;
3184                         } else {
3185                             i_ungetc(c1,f);
3186                         }
3187                         c1 = LF;
3188                         SEND;
3189                     } else  { /* if (c1 == CR)*/
3190                         if ((c1=(*i_getc)(f))!=EOF) {
3191                             if (c1==SP) {
3192                                 i_ungetc(SP,f);
3193                                 continue;
3194                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
3195                                 i_ungetc(SP,f);
3196                                 continue;
3197                             } else {
3198                                 i_ungetc(c1,f);
3199                             }
3200                             i_ungetc(LF,f);
3201                         } else {
3202                             i_ungetc(c1,f);
3203                         }
3204                         c1 = CR;
3205                         SEND;
3206                     }
3207                 }
3208             } else if (c1 == DEL && input_mode == JIS_X_0208) {
3209                 /* CP5022x */
3210                 c2 = c1;
3211                 NEXT;
3212             } else
3213                 SEND;
3214         }
3215         /* send: */
3216         switch(input_mode){
3217         case ASCII:
3218             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
3219             case -2:
3220                 /* 4 bytes UTF-8 */
3221                 if ((c0 = (*i_getc)(f)) != EOF) {
3222                     code_status(c0);
3223                     c0 <<= 8;
3224                     if ((c3 = (*i_getc)(f)) != EOF) {
3225                         code_status(c3);
3226                         (*iconv)(c2, c1, c0|c3);
3227                     }
3228                 }
3229                 break;
3230             case -1:
3231                 /* 3 bytes EUC or UTF-8 */
3232                 if ((c0 = (*i_getc)(f)) != EOF) {
3233                     code_status(c0);
3234                     (*iconv)(c2, c1, c0);
3235                 }
3236                 break;
3237             }
3238             break;
3239         case JIS_X_0208:
3240         case JIS_X_0213_1:
3241             if (ms_ucs_map_f &&
3242                 0x7F <= c2 && c2 <= 0x92 &&
3243                 0x21 <= c1 && c1 <= 0x7E) {
3244                 /* CP932 UDC */
3245                 if(c1 == 0x7F) return 0;
3246                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
3247                 c2 = 0;
3248             }
3249             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
3250             break;
3251 #ifdef X0212_ENABLE
3252         case JIS_X_0212:
3253             (*oconv)(PREFIX_EUCG3 | c2, c1);
3254             break;
3255 #endif /* X0212_ENABLE */
3256         case JIS_X_0213_2:
3257             (*oconv)(PREFIX_EUCG3 | c2, c1);
3258             break;
3259         default:
3260             (*oconv)(input_mode, c1);  /* other special case */
3261         }
3262
3263         c2 = 0;
3264         c0 = 0;
3265         continue;
3266         /* goto next_word */
3267     }
3268
3269     /* epilogue */
3270     (*iconv)(EOF, 0, 0);
3271     if (!input_codename)
3272     {
3273         if (is_8bit) {
3274             struct input_code *p = input_code_list;
3275             struct input_code *result = p;
3276             while (p->name){
3277                 if (p->score < result->score) result = p;
3278                 ++p;
3279             }
3280             set_input_codename(result->name);
3281 #ifdef CHECK_OPTION
3282             debug(result->name);
3283 #endif
3284         }
3285     }
3286     return 1;
3287 }
3288
3289 nkf_char
3290 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3291 {
3292     nkf_char ret, c3, c0;
3293     int hold_index;
3294
3295
3296     /** it must NOT be in the kanji shifte sequence      */
3297     /** it must NOT be written in JIS7                   */
3298     /** and it must be after 2 byte 8bit code            */
3299
3300     hold_count = 0;
3301     push_hold_buf(c2);
3302     push_hold_buf(c1);
3303
3304     while ((c1 = (*i_getc)(f)) != EOF) {
3305         if (c1 == ESC){
3306             (*i_ungetc)(c1,f);
3307             break;
3308         }
3309         code_status(c1);
3310         if (push_hold_buf(c1) == EOF || estab_f){
3311             break;
3312         }
3313     }
3314
3315     if (!estab_f){
3316         struct input_code *p = input_code_list;
3317         struct input_code *result = p;
3318         if (c1 == EOF){
3319             code_status(c1);
3320         }
3321         while (p->name){
3322             if (p->status_func && p->score < result->score){
3323                 result = p;
3324             }
3325             ++p;
3326         }
3327         set_iconv(TRUE, result->iconv_func);
3328     }
3329
3330
3331     /** now,
3332      ** 1) EOF is detected, or
3333      ** 2) Code is established, or
3334      ** 3) Buffer is FULL (but last word is pushed)
3335      **
3336      ** in 1) and 3) cases, we continue to use
3337      ** Kanji codes by oconv and leave estab_f unchanged.
3338      **/
3339
3340     ret = c1;
3341     hold_index = 0;
3342     while (hold_index < hold_count){
3343         c2 = hold_buf[hold_index++];
3344         if (c2 <= DEL
3345 #ifdef NUMCHAR_OPTION
3346             || is_unicode_capsule(c2)
3347 #endif
3348             ){
3349             (*iconv)(0, c2, 0);
3350             continue;
3351         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3352             (*iconv)(JIS_X_0201, c2, 0);
3353             continue;
3354         }
3355         if (hold_index < hold_count){
3356             c1 = hold_buf[hold_index++];
3357         }else{
3358             c1 = (*i_getc)(f);
3359             if (c1 == EOF){
3360                 c3 = EOF;
3361                 break;
3362             }
3363             code_status(c1);
3364         }
3365         c0 = 0;
3366         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3367         case -2:
3368             /* 4 bytes UTF-8 */
3369             if (hold_index < hold_count){
3370                 c0 = hold_buf[hold_index++];
3371             } else if ((c0 = (*i_getc)(f)) == EOF) {
3372                 ret = EOF;
3373                 break;
3374             } else {
3375                 code_status(c0);
3376                 c0 <<= 8;
3377                 if (hold_index < hold_count){
3378                     c3 = hold_buf[hold_index++];
3379                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3380                     c0 = ret = EOF;
3381                     break;
3382                 } else {
3383                     code_status(c3);
3384                     (*iconv)(c2, c1, c0|c3);
3385                 }
3386             }
3387             break;
3388         case -1:
3389             /* 3 bytes EUC or UTF-8 */
3390             if (hold_index < hold_count){
3391                 c0 = hold_buf[hold_index++];
3392             } else if ((c0 = (*i_getc)(f)) == EOF) {
3393                 ret = EOF;
3394                 break;
3395             } else {
3396                 code_status(c0);
3397             }
3398             (*iconv)(c2, c1, c0);
3399             break;
3400         }
3401         if (c0 == EOF) break;
3402     }
3403     return ret;
3404 }
3405
3406 nkf_char push_hold_buf(nkf_char c2)
3407 {
3408     if (hold_count >= HOLD_SIZE*2)
3409         return (EOF);
3410     hold_buf[hold_count++] = (unsigned char)c2;
3411     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3412 }
3413
3414 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3415 {
3416 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3417     nkf_char val;
3418 #endif
3419     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3420 #ifdef SHIFTJIS_CP932
3421     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3422         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3423         if (val){
3424             c2 = val >> 8;
3425             c1 = val & 0xff;
3426         }
3427     }
3428     if (cp932inv_f
3429         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3430         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3431         if (c){
3432             c2 = c >> 8;
3433             c1 = c & 0xff;
3434         }
3435     }
3436 #endif /* SHIFTJIS_CP932 */
3437 #ifdef X0212_ENABLE
3438     if (!x0213_f && is_ibmext_in_sjis(c2)){
3439         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3440         if (val){
3441             if (val > 0x7FFF){
3442                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3443                 c1 = val & 0xff;
3444             }else{
3445                 c2 = val >> 8;
3446                 c1 = val & 0xff;
3447             }
3448             if (p2) *p2 = c2;
3449             if (p1) *p1 = c1;
3450             return 0;
3451         }
3452     }
3453 #endif
3454     if(c2 >= 0x80){
3455         if(x0213_f && c2 >= 0xF0){
3456             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3457                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3458             }else{ /* 78<=k<=94 */
3459                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3460                 if (0x9E < c1) c2++;
3461             }
3462         }else{
3463             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3464             if (0x9E < c1) c2++;
3465         }
3466         if (c1 < 0x9F)
3467             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3468         else {
3469             c1 = c1 - 0x7E;
3470         }
3471     }
3472
3473 #ifdef X0212_ENABLE
3474     c2 = x0212_unshift(c2);
3475 #endif
3476     if (p2) *p2 = c2;
3477     if (p1) *p1 = c1;
3478     return 0;
3479 }
3480
3481 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3482 {
3483     if (c2 == JIS_X_0201) {
3484         c1 &= 0x7f;
3485     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3486         /* NOP */
3487     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3488         /* CP932 UDC */
3489         if(c1 == 0x7F) return 0;
3490         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3491         c2 = 0;
3492     } else {
3493         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3494         if (ret) return ret;
3495     }
3496     (*oconv)(c2, c1);
3497     return 0;
3498 }
3499
3500 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3501 {
3502     if (c2 == JIS_X_0201) {
3503         c1 &= 0x7f;
3504 #ifdef X0212_ENABLE
3505     }else if (c2 == 0x8f){
3506         if (c0 == 0){
3507             return -1;
3508         }
3509         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3510             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3511             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3512             c2 = 0;
3513         } else {
3514             c2 = (c2 << 8) | (c1 & 0x7f);
3515             c1 = c0 & 0x7f;
3516 #ifdef SHIFTJIS_CP932
3517             if (cp51932_f){
3518                 nkf_char s2, s1;
3519                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3520                     s2e_conv(s2, s1, &c2, &c1);
3521                     if (c2 < 0x100){
3522                         c1 &= 0x7f;
3523                         c2 &= 0x7f;
3524                     }
3525                 }
3526             }
3527 #endif /* SHIFTJIS_CP932 */
3528         }
3529 #endif /* X0212_ENABLE */
3530     } else if (c2 == SSO){
3531         c2 = JIS_X_0201;
3532         c1 &= 0x7f;
3533     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3534         /* NOP */
3535     } else {
3536         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3537             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3538             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3539             c2 = 0;
3540         } else {
3541             c1 &= 0x7f;
3542             c2 &= 0x7f;
3543 #ifdef SHIFTJIS_CP932
3544             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3545                 nkf_char s2, s1;
3546                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3547                     s2e_conv(s2, s1, &c2, &c1);
3548                     if (c2 < 0x100){
3549                         c1 &= 0x7f;
3550                         c2 &= 0x7f;
3551                     }
3552                 }
3553             }
3554 #endif /* SHIFTJIS_CP932 */
3555         }
3556     }
3557     (*oconv)(c2, c1);
3558     return 0;
3559 }
3560
3561 #ifdef UTF8_INPUT_ENABLE
3562 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3563 {
3564     nkf_char ret = 0;
3565
3566     if (!c1){
3567         *p2 = 0;
3568         *p1 = c2;
3569     }else if (0xc0 <= c2 && c2 <= 0xef) {
3570         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3571 #ifdef NUMCHAR_OPTION
3572         if (ret > 0){
3573             if (p2) *p2 = 0;
3574             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3575             ret = 0;
3576         }
3577 #endif
3578     }
3579     return ret;
3580 }
3581
3582 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3583 {
3584     nkf_char ret = 0;
3585     static const char w_iconv_utf8_1st_byte[] =
3586     { /* 0xC0 - 0xFF */
3587         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3588         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3589         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3590         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3591
3592     if (c2 < 0 || 0xff < c2) {
3593     }else if (c2 == 0) { /* 0 : 1 byte*/
3594         c0 = 0;
3595     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3596         return 0;
3597     } else{
3598         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3599         case 21:
3600             if (c1 < 0x80 || 0xBF < c1) return 0;
3601             break;
3602         case 30:
3603             if (c0 == 0) return -1;
3604             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3605                 return 0;
3606             break;
3607         case 31:
3608         case 33:
3609             if (c0 == 0) return -1;
3610             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3611                 return 0;
3612             break;
3613         case 32:
3614             if (c0 == 0) return -1;
3615             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3616                 return 0;
3617             break;
3618         case 40:
3619             if (c0 == 0) return -2;
3620             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3621                 return 0;
3622             break;
3623         case 41:
3624             if (c0 == 0) return -2;
3625             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3626                 return 0;
3627             break;
3628         case 42:
3629             if (c0 == 0) return -2;
3630             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3631                 return 0;
3632             break;
3633         default:
3634             return 0;
3635             break;
3636         }
3637     }
3638     if (c2 == 0 || c2 == EOF){
3639     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3640         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3641         c2 = 0;
3642     } else {
3643         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3644     }
3645     if (ret == 0){
3646         (*oconv)(c2, c1);
3647     }
3648     return ret;
3649 }
3650 #endif
3651
3652 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3653 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3654 {
3655     val &= VALUE_MASK;
3656     if (val < 0x80){
3657         *p2 = val;
3658         *p1 = 0;
3659         *p0 = 0;
3660     }else if (val < 0x800){
3661         *p2 = 0xc0 | (val >> 6);
3662         *p1 = 0x80 | (val & 0x3f);
3663         *p0 = 0;
3664     } else if (val <= NKF_INT32_C(0xFFFF)) {
3665         *p2 = 0xe0 | (val >> 12);
3666         *p1 = 0x80 | ((val >> 6) & 0x3f);
3667         *p0 = 0x80 | (val        & 0x3f);
3668     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3669         *p2 = 0xe0 |  (val >> 16);
3670         *p1 = 0x80 | ((val >> 12) & 0x3f);
3671         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3672     } else {
3673         *p2 = 0;
3674         *p1 = 0;
3675         *p0 = 0;
3676     }
3677 }
3678 #endif
3679
3680 #ifdef UTF8_INPUT_ENABLE
3681 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3682 {
3683     nkf_char val;
3684     if (c2 >= 0xf8) {
3685         val = -1;
3686     } else if (c2 >= 0xf0){
3687         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3688         val = (c2 & 0x0f) << 18;
3689         val |= (c1 & 0x3f) << 12;
3690         val |= (c0 & 0x3f00) >> 2;
3691         val |= (c0 & 0x3f);
3692     }else if (c2 >= 0xe0){
3693         val = (c2 & 0x0f) << 12;
3694         val |= (c1 & 0x3f) << 6;
3695         val |= (c0 & 0x3f);
3696     }else if (c2 >= 0xc0){
3697         val = (c2 & 0x1f) << 6;
3698         val |= (c1 & 0x3f);
3699     }else{
3700         val = c2;
3701     }
3702     return val;
3703 }
3704
3705 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3706 {
3707     nkf_char c2, c1, c0;
3708     nkf_char ret = 0;
3709     val &= VALUE_MASK;
3710     if (val < 0x80){
3711         *p2 = 0;
3712         *p1 = val;
3713     }else{
3714         w16w_conv(val, &c2, &c1, &c0);
3715         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3716 #ifdef NUMCHAR_OPTION
3717         if (ret > 0){
3718             *p2 = 0;
3719             *p1 = CLASS_UNICODE | val;
3720             ret = 0;
3721         }
3722 #endif
3723     }
3724     return ret;
3725 }
3726 #endif
3727
3728 #ifdef UTF8_INPUT_ENABLE
3729 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3730 {
3731     nkf_char ret = 0;
3732     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3733         (*oconv)(c2, c1);
3734         return 0;
3735     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3736         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3737             return -2;
3738         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3739         c2 = 0;
3740     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3741         /*
3742            return 2;
3743         */
3744         return 1;
3745     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3746     if (ret) return ret;
3747     (*oconv)(c2, c1);
3748     return 0;
3749 }
3750
3751 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3752 {
3753     int ret = 0;
3754
3755     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3756     } else if (is_unicode_bmp(c1)) {
3757         ret = w16e_conv(c1, &c2, &c1);
3758     } else {
3759         c2 = 0;
3760         c1 =  CLASS_UNICODE | c1;
3761     }
3762     if (ret) return ret;
3763     (*oconv)(c2, c1);
3764     return 0;
3765 }
3766
3767 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3768 {
3769     const unsigned short *const *pp;
3770     const unsigned short *const *const *ppp;
3771     static const char no_best_fit_chars_table_C2[] =
3772     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3773         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3774         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3775         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3776     static const char no_best_fit_chars_table_C2_ms[] =
3777     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3778         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3779         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3780         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3781     static const char no_best_fit_chars_table_932_C2[] =
3782     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3783         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3784         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3785         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3786     static const char no_best_fit_chars_table_932_C3[] =
3787     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3788         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3789         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3790         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3791     nkf_char ret = 0;
3792
3793     if(c2 < 0x80){
3794         *p2 = 0;
3795         *p1 = c2;
3796     }else if(c2 < 0xe0){
3797         if(no_best_fit_chars_f){
3798             if(ms_ucs_map_f == UCS_MAP_CP932){
3799                 switch(c2){
3800                 case 0xC2:
3801                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3802                     break;
3803                 case 0xC3:
3804                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3805                     break;
3806                 }
3807             }else if(!cp932inv_f){
3808                 switch(c2){
3809                 case 0xC2:
3810                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3811                     break;
3812                 case 0xC3:
3813                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3814                     break;
3815                 }
3816             }else if(ms_ucs_map_f == UCS_MAP_MS){
3817                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3818             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3819                 switch(c2){
3820                 case 0xC2:
3821                     switch(c1){
3822                     case 0xA2:
3823                     case 0xA3:
3824                     case 0xA5:
3825                     case 0xA6:
3826                     case 0xAC:
3827                     case 0xAF:
3828                     case 0xB8:
3829                         return 1;
3830                     }
3831                     break;
3832                 }
3833             }
3834         }
3835         pp =
3836             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3837             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3838             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3839             utf8_to_euc_2bytes;
3840         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3841     }else if(c0 < 0xF0){
3842         if(no_best_fit_chars_f){
3843             if(ms_ucs_map_f == UCS_MAP_CP932){
3844                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3845             }else if(ms_ucs_map_f == UCS_MAP_MS){
3846                 switch(c2){
3847                 case 0xE2:
3848                     switch(c1){
3849                     case 0x80:
3850                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3851                         break;
3852                     case 0x88:
3853                         if(c0 == 0x92) return 1;
3854                         break;
3855                     }
3856                     break;
3857                 case 0xE3:
3858                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3859                     break;
3860                 }
3861             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3862                 switch(c2){
3863                 case 0xE3:
3864                     switch(c1){
3865                     case 0x82:
3866                             if(c0 == 0x94) return 1;
3867                         break;
3868                     case 0x83:
3869                             if(c0 == 0xBB) return 1;
3870                         break;
3871                     }
3872                     break;
3873                 }
3874             }else{
3875                 switch(c2){
3876                 case 0xE2:
3877                     switch(c1){
3878                     case 0x80:
3879                         if(c0 == 0x95) return 1;
3880                         break;
3881                     case 0x88:
3882                         if(c0 == 0xA5) return 1;
3883                         break;
3884                     }
3885                     break;
3886                 case 0xEF:
3887                     switch(c1){
3888                     case 0xBC:
3889                         if(c0 == 0x8D) return 1;
3890                         break;
3891                     case 0xBD:
3892                         if(c0 == 0x9E && !cp932inv_f) return 1;
3893                         break;
3894                     case 0xBF:
3895                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3896                         break;
3897                     }
3898                     break;
3899                 }
3900             }
3901         }
3902         ppp =
3903             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3904             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3905             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3906             utf8_to_euc_3bytes;
3907         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3908     }else return -1;
3909 #ifdef SHIFTJIS_CP932
3910     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3911         nkf_char s2, s1;
3912         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3913             s2e_conv(s2, s1, p2, p1);
3914         }else{
3915             ret = 1;
3916         }
3917     }
3918 #endif
3919     return ret;
3920 }
3921
3922 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3923 {
3924     nkf_char c2;
3925     const unsigned short *p;
3926     unsigned short val;
3927
3928     if (pp == 0) return 1;
3929
3930     c1 -= 0x80;
3931     if (c1 < 0 || psize <= c1) return 1;
3932     p = pp[c1];
3933     if (p == 0)  return 1;
3934
3935     c0 -= 0x80;
3936     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3937     val = p[c0];
3938     if (val == 0) return 1;
3939     if (no_cp932ext_f && (
3940         (val>>8) == 0x2D || /* NEC special characters */
3941         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3942         )) return 1;
3943
3944     c2 = val >> 8;
3945    if (val > 0x7FFF){
3946         c2 &= 0x7f;
3947         c2 |= PREFIX_EUCG3;
3948     }
3949     if (c2 == SO) c2 = JIS_X_0201;
3950     c1 = val & 0x7f;
3951     if (p2) *p2 = c2;
3952     if (p1) *p1 = c1;
3953     return 0;
3954 }
3955
3956 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3957 {
3958     int shift = 20;
3959     c &= VALUE_MASK;
3960     while(shift >= 0){
3961         if(c >= 1<<shift){
3962             while(shift >= 0){
3963                 (*f)(0, bin2hex(c>>shift));
3964                 shift -= 4;
3965             }
3966         }else{
3967             shift -= 4;
3968         }
3969     }
3970     return;
3971 }
3972
3973 void encode_fallback_html(nkf_char c)
3974 {
3975     (*oconv)(0, '&');
3976     (*oconv)(0, '#');
3977     c &= VALUE_MASK;
3978     if(c >= NKF_INT32_C(1000000))
3979         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3980     if(c >= NKF_INT32_C(100000))
3981         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3982     if(c >= 10000)
3983         (*oconv)(0, 0x30+(c/10000  )%10);
3984     if(c >= 1000)
3985         (*oconv)(0, 0x30+(c/1000   )%10);
3986     if(c >= 100)
3987         (*oconv)(0, 0x30+(c/100    )%10);
3988     if(c >= 10)
3989         (*oconv)(0, 0x30+(c/10     )%10);
3990     if(c >= 0)
3991         (*oconv)(0, 0x30+ c         %10);
3992     (*oconv)(0, ';');
3993     return;
3994 }
3995
3996 void encode_fallback_xml(nkf_char c)
3997 {
3998     (*oconv)(0, '&');
3999     (*oconv)(0, '#');
4000     (*oconv)(0, 'x');
4001     nkf_each_char_to_hex(oconv, c);
4002     (*oconv)(0, ';');
4003     return;
4004 }
4005
4006 void encode_fallback_java(nkf_char c)
4007 {
4008     (*oconv)(0, '\\');
4009     c &= VALUE_MASK;
4010     if(!is_unicode_bmp(c)){
4011         (*oconv)(0, 'U');
4012         (*oconv)(0, '0');
4013         (*oconv)(0, '0');
4014         (*oconv)(0, bin2hex(c>>20));
4015         (*oconv)(0, bin2hex(c>>16));
4016     }else{
4017         (*oconv)(0, 'u');
4018     }
4019     (*oconv)(0, bin2hex(c>>12));
4020     (*oconv)(0, bin2hex(c>> 8));
4021     (*oconv)(0, bin2hex(c>> 4));
4022     (*oconv)(0, bin2hex(c    ));
4023     return;
4024 }
4025
4026 void encode_fallback_perl(nkf_char c)
4027 {
4028     (*oconv)(0, '\\');
4029     (*oconv)(0, 'x');
4030     (*oconv)(0, '{');
4031     nkf_each_char_to_hex(oconv, c);
4032     (*oconv)(0, '}');
4033     return;
4034 }
4035
4036 void encode_fallback_subchar(nkf_char c)
4037 {
4038     c = unicode_subchar;
4039     (*oconv)((c>>8)&0xFF, c&0xFF);
4040     return;
4041 }
4042 #endif
4043
4044 #ifdef UTF8_OUTPUT_ENABLE
4045 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
4046 {
4047     const unsigned short *p;
4048
4049     if (c2 == JIS_X_0201) {
4050         if (ms_ucs_map_f == UCS_MAP_CP10001) {
4051             switch (c1) {
4052             case 0x20:
4053                 return 0xA0;
4054             case 0x7D:
4055                 return 0xA9;
4056             }
4057         }
4058         p = euc_to_utf8_1byte;
4059 #ifdef X0212_ENABLE
4060     } else if (is_eucg3(c2)){
4061         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
4062             return 0xA6;
4063         }
4064         c2 = (c2&0x7f) - 0x21;
4065         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4066             p = x0212_to_utf8_2bytes[c2];
4067         else
4068             return 0;
4069 #endif
4070     } else {
4071         c2 &= 0x7f;
4072         c2 = (c2&0x7f) - 0x21;
4073         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4074             p =
4075                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
4076                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
4077                 euc_to_utf8_2bytes_ms[c2];
4078         else
4079             return 0;
4080     }
4081     if (!p) return 0;
4082     c1 = (c1 & 0x7f) - 0x21;
4083     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
4084         return p[c1];
4085     return 0;
4086 }
4087
4088 void w_oconv(nkf_char c2, nkf_char c1)
4089 {
4090     nkf_char c0;
4091     nkf_char val;
4092
4093     if (output_bom_f) {
4094         output_bom_f = FALSE;
4095         (*o_putc)('\357');
4096         (*o_putc)('\273');
4097         (*o_putc)('\277');
4098     }
4099
4100     if (c2 == EOF) {
4101         (*o_putc)(EOF);
4102         return;
4103     }
4104
4105 #ifdef NUMCHAR_OPTION
4106     if (c2 == 0 && is_unicode_capsule(c1)){
4107         val = c1 & VALUE_MASK;
4108         if (val < 0x80){
4109             (*o_putc)(val);
4110         }else if (val < 0x800){
4111             (*o_putc)(0xC0 | (val >> 6));
4112             (*o_putc)(0x80 | (val & 0x3f));
4113         } else if (val <= NKF_INT32_C(0xFFFF)) {
4114             (*o_putc)(0xE0 | (val >> 12));
4115             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
4116             (*o_putc)(0x80 | (val        & 0x3f));
4117         } else if (val <= NKF_INT32_C(0x10FFFF)) {
4118             (*o_putc)(0xF0 | ( val>>18));
4119             (*o_putc)(0x80 | ((val>>12) & 0x3f));
4120             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
4121             (*o_putc)(0x80 | ( val      & 0x3f));
4122         }
4123         return;
4124     }
4125 #endif
4126
4127     if (c2 == 0) {
4128         output_mode = ASCII;
4129         (*o_putc)(c1);
4130     } else if (c2 == ISO_8859_1) {
4131         output_mode = UTF_8;
4132         (*o_putc)(c1 | 0x080);
4133     } else {
4134         output_mode = UTF_8;
4135         val = e2w_conv(c2, c1);
4136         if (val){
4137             w16w_conv(val, &c2, &c1, &c0);
4138             (*o_putc)(c2);
4139             if (c1){
4140                 (*o_putc)(c1);
4141                 if (c0) (*o_putc)(c0);
4142             }
4143         }
4144     }
4145 }
4146
4147 void w_oconv16(nkf_char c2, nkf_char c1)
4148 {
4149     if (output_bom_f) {
4150         output_bom_f = FALSE;
4151         if (output_endian == ENDIAN_LITTLE){
4152             (*o_putc)((unsigned char)'\377');
4153             (*o_putc)('\376');
4154         }else{
4155             (*o_putc)('\376');
4156             (*o_putc)((unsigned char)'\377');
4157         }
4158     }
4159
4160     if (c2 == EOF) {
4161         (*o_putc)(EOF);
4162         return;
4163     }
4164
4165     if (c2 == ISO_8859_1) {
4166         c2 = 0;
4167         c1 |= 0x80;
4168 #ifdef NUMCHAR_OPTION
4169     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4170         if (is_unicode_bmp(c1)) {
4171             c2 = (c1 >> 8) & 0xff;
4172             c1 &= 0xff;
4173         } else {
4174             c1 &= VALUE_MASK;
4175             if (c1 <= UNICODE_MAX) {
4176                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
4177                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
4178                 if (output_endian == ENDIAN_LITTLE){
4179                     (*o_putc)(c2 & 0xff);
4180                     (*o_putc)((c2 >> 8) & 0xff);
4181                     (*o_putc)(c1 & 0xff);
4182                     (*o_putc)((c1 >> 8) & 0xff);
4183                 }else{
4184                     (*o_putc)((c2 >> 8) & 0xff);
4185                     (*o_putc)(c2 & 0xff);
4186                     (*o_putc)((c1 >> 8) & 0xff);
4187                     (*o_putc)(c1 & 0xff);
4188                 }
4189             }
4190             return;
4191         }
4192 #endif
4193     } else if (c2) {
4194         nkf_char val = e2w_conv(c2, c1);
4195         c2 = (val >> 8) & 0xff;
4196         c1 = val & 0xff;
4197         if (!val) return;
4198     }
4199     if (output_endian == ENDIAN_LITTLE){
4200         (*o_putc)(c1);
4201         (*o_putc)(c2);
4202     }else{
4203         (*o_putc)(c2);
4204         (*o_putc)(c1);
4205