OSDN Git Service

* fix some errors.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.159 2007/12/23 07:55:20 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-12-22"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #if defined(DEFAULT_CODE_JIS)
44 #elif defined(DEFAULT_CODE_SJIS)
45 #elif defined(DEFAULT_CODE_EUC)
46 #elif defined(DEFAULT_CODE_UTF8)
47 #else
48 #define DEFAULT_CODE_JIS 1
49 #endif
50
51 #ifndef MIME_DECODE_DEFAULT
52 #define MIME_DECODE_DEFAULT STRICT_MIME
53 #endif
54 #ifndef X0201_DEFAULT
55 #define X0201_DEFAULT TRUE
56 #endif
57
58 #if DEFAULT_NEWLINE == 0x0D0A
59 #define PUT_NEWLINE(func) do {\
60     func(0x0D);\
61     func(0x0A);\
62 } while (0)
63 #define OCONV_NEWLINE(func) do {\
64     func(0, 0x0D);\
65     func(0, 0x0A);\
66 } while (0)
67 #elif DEFAULT_NEWLINE == 0x0D
68 #define PUT_NEWLINE(func) func(0x0D)
69 #define OCONV_NEWLINE(func) func(0, 0x0D)
70 #else
71 #define DEFAULT_NEWLINE 0x0A
72 #define PUT_NEWLINE(func) func(0x0A)
73 #define OCONV_NEWLINE(func) func(0, 0x0A)
74 #endif
75 #ifdef HELP_OUTPUT_STDERR
76 #define HELP_OUTPUT stderr
77 #else
78 #define HELP_OUTPUT stdout
79 #endif
80
81 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
82 #define MSDOS
83 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
84 #define __WIN32__
85 #endif
86 #endif
87
88 #ifdef PERL_XS
89 #undef OVERWRITE
90 #endif
91
92 #ifndef PERL_XS
93 #include <stdio.h>
94 #endif
95
96 #include <stdlib.h>
97 #include <string.h>
98
99 #if defined(MSDOS) || defined(__OS2__)
100 #include <fcntl.h>
101 #include <io.h>
102 #if defined(_MSC_VER) || defined(__WATCOMC__)
103 #define mktemp _mktemp
104 #endif
105 #endif
106
107 #ifdef MSDOS
108 #ifdef LSI_C
109 #define setbinmode(fp) fsetbin(fp)
110 #elif defined(__DJGPP__)
111 #include <libc/dosio.h>
112 #define setbinmode(fp) djgpp_setbinmode(fp)
113 #else /* Microsoft C, Turbo C */
114 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
115 #endif
116 #else /* UNIX */
117 #define setbinmode(fp)
118 #endif
119
120 #if defined(__DJGPP__)
121 void  djgpp_setbinmode(FILE *fp)
122 {
123     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
124     int fd, m;
125     fd = fileno(fp);
126     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
127     __file_handle_set(fd, m);
128 }
129 #endif
130
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
133 #else /* BSD */
134 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
135 #endif
136
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
139 #define         EASYWIN
140 #ifndef __WIN16__
141 #define __WIN16__
142 #endif
143 #include <windows.h>
144 #endif
145
146 #ifdef OVERWRITE
147 /* added by satoru@isoternet.org */
148 #if defined(__EMX__)
149 #include <sys/types.h>
150 #endif
151 #include <sys/stat.h>
152 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
153 #include <unistd.h>
154 #if defined(__WATCOMC__)
155 #include <sys/utime.h>
156 #else
157 #include <utime.h>
158 #endif
159 #else /* defined(MSDOS) */
160 #ifdef __WIN32__
161 #ifdef __BORLANDC__ /* BCC32 */
162 #include <utime.h>
163 #else /* !defined(__BORLANDC__) */
164 #include <sys/utime.h>
165 #endif /* (__BORLANDC__) */
166 #else /* !defined(__WIN32__) */
167 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
168 #include <sys/utime.h>
169 #elif defined(__TURBOC__) /* BCC */
170 #include <utime.h>
171 #elif defined(LSI_C) /* LSI C */
172 #endif /* (__WIN32__) */
173 #endif
174 #endif
175 #endif
176
177 #define         FALSE   0
178 #define         TRUE    1
179
180 /* state of output_mode and input_mode
181
182    c2           0 means ASCII
183                 JIS_X_0201
184                 ISO_8859_1
185                 JIS_X_0208
186                 EOF      all termination
187    c1           32bit data
188
189  */
190
191 /* Input Assumption */
192
193 #define         JIS_INPUT       4
194 #define         EUC_INPUT      16
195 #define         SJIS_INPUT      5
196 #define         LATIN1_INPUT    6
197 #define         UTF8_INPUT     13
198 #define         UTF16_INPUT    1015
199 #define         UTF32_INPUT    1017
200
201 #define         FIXED_MIME      7
202 #define         STRICT_MIME     8
203
204 /* MIME ENCODE */
205
206
207 /* byte order */
208
209 #define         ENDIAN_BIG      1234
210 #define         ENDIAN_LITTLE   4321
211 #define         ENDIAN_2143     2143
212 #define         ENDIAN_3412     3412
213
214 /* ASCII CODE */
215
216 #define         BS      0x08
217 #define         TAB     0x09
218 #define         LF      0x0a
219 #define         CR      0x0d
220 #define         ESC     0x1b
221 #define         SP      0x20
222 #define         AT      0x40
223 #define         SSP     0xa0
224 #define         DEL     0x7f
225 #define         SI      0x0f
226 #define         SO      0x0e
227 #define         SSO     0x8e
228 #define         SS3     0x8f
229 #define         CRLF    0x0D0A
230
231
232 /* encodings */
233
234 enum nkf_encodings {
235     ASCII,
236     ISO_8859_1,
237     ISO_2022_JP,
238     CP50220,
239     CP50221,
240     CP50222,
241     ISO_2022_JP_1,
242     ISO_2022_JP_3,
243     SHIFT_JIS,
244     WINDOWS_31J,
245     CP10001,
246     EUC_JP,
247     CP51932,
248     EUCJP_MS,
249     EUCJP_ASCII,
250     SHIFT_JISX0213,
251     SHIFT_JIS_2004,
252     EUC_JISX0213,
253     EUC_JIS_2004,
254     UTF_8,
255     UTF_8N,
256     UTF_8_BOM,
257     UTF8_MAC,
258     UTF_16,
259     UTF_16BE,
260     UTF_16BE_BOM,
261     UTF_16LE,
262     UTF_16LE_BOM,
263     UTF_32,
264     UTF_32BE,
265     UTF_32BE_BOM,
266     UTF_32LE,
267     UTF_32LE_BOM,
268     JIS_X_0201=0x1000,
269     JIS_X_0208,
270     JIS_X_0212,
271     JIS_X_0213_1,
272     JIS_X_0213_2,
273     BINARY
274 };
275
276 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
277 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
278 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
279 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
280 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
281 void j_oconv(nkf_char c2, nkf_char c1);
282 void s_oconv(nkf_char c2, nkf_char c1);
283 void e_oconv(nkf_char c2, nkf_char c1);
284 void w_oconv(nkf_char c2, nkf_char c1);
285 void w_oconv16(nkf_char c2, nkf_char c1);
286 void w_oconv32(nkf_char c2, nkf_char c1);
287
288 typedef struct {
289     char *name;
290     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
291     void (*oconv_func)(nkf_char c2, nkf_char c1);
292 } nkf_native_encoding;
293
294 nkf_native_encoding NkfEncodingASCII =          { "US_ASCII", e_iconv, e_oconv };
295 nkf_native_encoding NkfEncodingISO_2022_JP =    { "ISO-2022-JP", e_iconv, j_oconv };
296 nkf_native_encoding NkfEncodingShift_JIS =      { "Shift_JIS", s_iconv, s_oconv };
297 nkf_native_encoding NkfEncodingEUC_JP =         { "EUC-JP", e_iconv, e_oconv };
298 nkf_native_encoding NkfEncodingUTF_8 =          { "UTF-8", w_iconv, w_oconv };
299 nkf_native_encoding NkfEncodingUTF_16 =         { "UTF-16", w_iconv16, w_oconv16 };
300 nkf_native_encoding NkfEncodingUTF_32 =         { "UTF-32", w_iconv32, w_oconv32 };
301
302 typedef struct {
303     int id;
304     char *name;
305     nkf_native_encoding *based_encoding;
306 } nkf_encoding;
307 nkf_encoding nkf_encoding_table[] = {
308     {ASCII,             "ASCII",                &NkfEncodingASCII},
309     {ISO_8859_1,        "ISO-8859-1",           &NkfEncodingASCII},
310     {ISO_2022_JP,       "ISO-2022-JP",          &NkfEncodingASCII},
311     {CP50220,           "CP50220",              &NkfEncodingISO_2022_JP},
312     {CP50221,           "CP50221",              &NkfEncodingISO_2022_JP},
313     {CP50222,           "CP50222",              &NkfEncodingISO_2022_JP},
314     {ISO_2022_JP_1,     "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
315     {ISO_2022_JP_3,     "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
316     {SHIFT_JIS,         "Shift_JIS",            &NkfEncodingShift_JIS},
317     {WINDOWS_31J,       "WINDOWS-31J",          &NkfEncodingShift_JIS},
318     {CP10001,           "CP10001",              &NkfEncodingShift_JIS},
319     {EUC_JP,            "EUC-JP",               &NkfEncodingEUC_JP},
320     {CP51932,           "CP51932",              &NkfEncodingEUC_JP},
321     {EUCJP_MS,          "eucJP-MS",             &NkfEncodingEUC_JP},
322     {EUCJP_ASCII,       "eucJP-ASCII",          &NkfEncodingEUC_JP},
323     {SHIFT_JISX0213,    "Shift_JISX0213",       &NkfEncodingShift_JIS},
324     {SHIFT_JIS_2004,    "Shift_JIS-2004",       &NkfEncodingShift_JIS},
325     {EUC_JISX0213,      "EUC-JISX0213",         &NkfEncodingEUC_JP},
326     {EUC_JIS_2004,      "EUC-JIS-2004",         &NkfEncodingEUC_JP},
327     {UTF_8,             "UTF-8",                &NkfEncodingUTF_8},
328     {UTF_8N,            "UTF-8N",               &NkfEncodingUTF_8},
329     {UTF_8_BOM,         "UTF-8-BOM",            &NkfEncodingUTF_8},
330     {UTF8_MAC,          "UTF8-MAC",             &NkfEncodingUTF_8},
331     {UTF_16,            "UTF-16",               &NkfEncodingUTF_16},
332     {UTF_16BE,          "UTF-16BE",             &NkfEncodingUTF_16},
333     {UTF_16BE_BOM,      "UTF-16BE-BOM",         &NkfEncodingUTF_16},
334     {UTF_16LE,          "UTF-16LE",             &NkfEncodingUTF_16},
335     {UTF_16LE_BOM,      "UTF-16LE-BOM",         &NkfEncodingUTF_16},
336     {UTF_32,            "UTF-32",               &NkfEncodingUTF_32},
337     {UTF_32BE,          "UTF-32BE",             &NkfEncodingUTF_32},
338     {UTF_32BE_BOM,      "UTF-32BE-BOM",         &NkfEncodingUTF_32},
339     {UTF_32LE,          "UTF-32LE",             &NkfEncodingUTF_32},
340     {UTF_32LE_BOM,      "UTF-32LE-BOM",         &NkfEncodingUTF_32},
341     {BINARY,            "BINARY",               &NkfEncodingASCII},
342     {-1,                NULL,                   NULL}
343 };
344 #define NKF_ENCODING_TABLE_SIZE 34
345 struct {
346     const char *name;
347     const int id;
348 } encoding_name_to_id_table[] = {
349     {"ASCII",                   ASCII},
350     {"ISO-2022-JP",             ISO_2022_JP},
351     {"X-ISO2022JP-CP932",       CP50220},
352     {"CP50220",                 CP50220},
353     {"CP50221",                 CP50221},
354     {"CP50222",                 CP50222},
355     {"ISO-2022-JP-1",           ISO_2022_JP_1},
356     {"ISO-2022-JP-3",           ISO_2022_JP_3},
357     {"SHIFT_JIS",               SHIFT_JIS},
358     {"SJIS",                    SHIFT_JIS},
359     {"WINDOWS-31J",             WINDOWS_31J},
360     {"CSWINDOWS31J",            WINDOWS_31J},
361     {"CP932",                   WINDOWS_31J},
362     {"MS932",                   WINDOWS_31J},
363     {"CP10001",                 CP10001},
364     {"EUCJP",                   EUC_JP},
365     {"EUC-JP",                  EUC_JP},
366     {"CP51932",                 CP51932},
367     {"EUC-JP-MS",               EUCJP_MS},
368     {"EUCJP-MS",                EUCJP_MS},
369     {"EUCJPMS",                 EUCJP_MS},
370     {"EUC-JP-ASCII",            EUCJP_ASCII},
371     {"EUCJP-ASCII",             EUCJP_ASCII},
372     {"SHIFT_JISX0213",          SHIFT_JISX0213},
373     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
374     {"EUC-JISX0213",            EUC_JISX0213},
375     {"EUC-JIS-2004",            EUC_JIS_2004},
376     {"UTF-8",                   UTF_8},
377     {"UTF-8N",                  UTF_8N},
378     {"UTF-8-BOM",               UTF_8_BOM},
379     {"UTF8-MAC",                UTF8_MAC},
380     {"UTF-8-MAC",               UTF8_MAC},
381     {"UTF-16",                  UTF_16},
382     {"UTF-16BE",                UTF_16BE},
383     {"UTF-16BE-BOM",            UTF_16BE_BOM},
384     {"UTF-16LE",                UTF_16LE},
385     {"UTF-16LE-BOM",            UTF_16LE_BOM},
386     {"UTF-32",                  UTF_32},
387     {"UTF-32BE",                UTF_32BE},
388     {"UTF-32BE-BOM",            UTF_32BE_BOM},
389     {"UTF-32LE",                UTF_32LE},
390     {"UTF-32LE-BOM",            UTF_32LE_BOM},
391     {"BINARY",                  BINARY},
392     {NULL,                      -1}
393 };
394 #if defined(DEFAULT_CODE_JIS)
395 #define     DEFAULT_ENCODING ISO_2022_JP
396 #elif defined(DEFAULT_CODE_SJIS)
397 #define     DEFAULT_ENCODING SHIFT_JIS
398 #elif defined(DEFAULT_CODE_EUC)
399 #define     DEFAULT_ENCODING EUC_JP
400 #elif defined(DEFAULT_CODE_UTF8)
401 #define     DEFAULT_ENCODING UTF_8
402 #endif
403
404
405 #define         is_alnum(c)  \
406             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
407
408 /* I don't trust portablity of toupper */
409 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
410 #define nkf_isoctal(c)  ('0'<=c && c<='7')
411 #define nkf_isdigit(c)  ('0'<=c && c<='9')
412 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
413 #define nkf_isblank(c) (c == SP || c == TAB)
414 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
415 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
416 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
417 #define nkf_isprint(c) (SP<=c && c<='~')
418 #define nkf_isgraph(c) ('!'<=c && c<='~')
419 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
420                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
421                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
422 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
423 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
424 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
425     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
426      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
427
428 #define CP932_TABLE_BEGIN 0xFA
429 #define CP932_TABLE_END   0xFC
430 #define CP932INV_TABLE_BEGIN 0xED
431 #define CP932INV_TABLE_END   0xEE
432 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
433
434 #define         HOLD_SIZE       1024
435 #if defined(INT_IS_SHORT)
436 #define         IOBUF_SIZE      2048
437 #else
438 #define         IOBUF_SIZE      16384
439 #endif
440
441 #define         DEFAULT_J       'B'
442 #define         DEFAULT_R       'B'
443
444 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
445 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
446
447 #define         RANGE_NUM_MAX   18
448 #define         GETA1   0x22
449 #define         GETA2   0x2e
450
451
452 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
453 #define sizeof_euc_to_utf8_1byte 94
454 #define sizeof_euc_to_utf8_2bytes 94
455 #define sizeof_utf8_to_euc_C2 64
456 #define sizeof_utf8_to_euc_E5B8 64
457 #define sizeof_utf8_to_euc_2bytes 112
458 #define sizeof_utf8_to_euc_3bytes 16
459 #endif
460
461 /* MIME preprocessor */
462
463 #ifdef EASYWIN /*Easy Win */
464 extern POINT _BufferSize;
465 #endif
466
467 struct input_code{
468     char *name;
469     nkf_char stat;
470     nkf_char score;
471     nkf_char index;
472     nkf_char buf[3];
473     void (*status_func)(struct input_code *, nkf_char);
474     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
475     int _file_stat;
476 };
477
478 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
479 static nkf_encoding *output_encoding;
480
481 #if !defined(PERL_XS) && !defined(WIN32DLL)
482 static  nkf_char     noconvert(FILE *f);
483 #endif
484 static  void    module_connection(void);
485 static  nkf_char     kanji_convert(FILE *f);
486 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
487 static  nkf_char     push_hold_buf(nkf_char c2);
488 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
489 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
490 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
491 /* UCS Mapping
492  * 0: Shift_JIS, eucJP-ascii
493  * 1: eucJP-ms
494  * 2: CP932, CP51932
495  * 3: CP10001
496  */
497 #define UCS_MAP_ASCII   0
498 #define UCS_MAP_MS      1
499 #define UCS_MAP_CP932   2
500 #define UCS_MAP_CP10001 3
501 static int ms_ucs_map_f = UCS_MAP_ASCII;
502 #endif
503 #ifdef UTF8_INPUT_ENABLE
504 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
505 static  int     no_cp932ext_f = FALSE;
506 /* ignore ZERO WIDTH NO-BREAK SPACE */
507 static  int     no_best_fit_chars_f = FALSE;
508 static  int     input_endian = ENDIAN_BIG;
509 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
510 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
511 static  void    encode_fallback_html(nkf_char c);
512 static  void    encode_fallback_xml(nkf_char c);
513 static  void    encode_fallback_java(nkf_char c);
514 static  void    encode_fallback_perl(nkf_char c);
515 static  void    encode_fallback_subchar(nkf_char c);
516 static  void    (*encode_fallback)(nkf_char c) = NULL;
517 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
518 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
519 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
520 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
521 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
522 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
523 static  void    w_status(struct input_code *, nkf_char);
524 #endif
525 #ifdef UTF8_OUTPUT_ENABLE
526 static  int     output_bom_f = FALSE;
527 static  int     output_endian = ENDIAN_BIG;
528 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
529 #endif
530 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
531 static  void    fold_conv(nkf_char c2,nkf_char c1);
532 static  void    nl_conv(nkf_char c2,nkf_char c1);
533 static  void    z_conv(nkf_char c2,nkf_char c1);
534 static  void    rot_conv(nkf_char c2,nkf_char c1);
535 static  void    hira_conv(nkf_char c2,nkf_char c1);
536 static  void    base64_conv(nkf_char c2,nkf_char c1);
537 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
538 static  void    no_connection(nkf_char c2,nkf_char c1);
539 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
540
541 static  void    code_score(struct input_code *ptr);
542 static  void    code_status(nkf_char c);
543
544 static  void    std_putc(nkf_char c);
545 static  nkf_char     std_getc(FILE *f);
546 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
547
548 static  nkf_char     broken_getc(FILE *f);
549 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
550
551 static  nkf_char     mime_begin(FILE *f);
552 static  nkf_char     mime_getc(FILE *f);
553 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
554
555 static  void    switch_mime_getc(void);
556 static  void    unswitch_mime_getc(void);
557 static  nkf_char     mime_begin_strict(FILE *f);
558 static  nkf_char     mime_getc_buf(FILE *f);
559 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
560 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
561
562 static  nkf_char     base64decode(nkf_char c);
563 static  void    mime_prechar(nkf_char c2, nkf_char c1);
564 static  void    mime_putc(nkf_char c);
565 static  void    open_mime(nkf_char c);
566 static  void    close_mime(void);
567 static  void    eof_mime(void);
568 static  void    mimeout_addchar(nkf_char c);
569 #ifndef PERL_XS
570 static  void    usage(void);
571 static  void    version(void);
572 static  void    show_configuration(void);
573 #endif
574 static  void    options(unsigned char *c);
575 static  void    reinit(void);
576
577 /* buffers */
578
579 #if !defined(PERL_XS) && !defined(WIN32DLL)
580 static unsigned char   stdibuf[IOBUF_SIZE];
581 static unsigned char   stdobuf[IOBUF_SIZE];
582 #endif
583 static unsigned char   hold_buf[HOLD_SIZE*2];
584 static int             hold_count = 0;
585
586 /* MIME preprocessor fifo */
587
588 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
589 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
590 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
591 static unsigned char           mime_buf[MIME_BUF_SIZE];
592 static unsigned int            mime_top = 0;
593 static unsigned int            mime_last = 0;  /* decoded */
594 static unsigned int            mime_input = 0; /* undecoded */
595 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
596
597 /* flags */
598 static int             unbuf_f = FALSE;
599 static int             estab_f = FALSE;
600 static int             nop_f = FALSE;
601 static int             binmode_f = TRUE;       /* binary mode */
602 static int             rot_f = FALSE;          /* rot14/43 mode */
603 static int             hira_f = FALSE;          /* hira/kata henkan */
604 static int             input_f = FALSE;        /* non fixed input code  */
605 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
606 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
607 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
608 static int             mimebuf_f = FALSE;      /* MIME buffered input */
609 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
610 static int             iso8859_f = FALSE;      /* ISO8859 through */
611 static int             mimeout_f = FALSE;       /* base64 mode */
612 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
613 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
614
615 #ifdef UNICODE_NORMALIZATION
616 static int nfc_f = FALSE;
617 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
618 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
619 static nkf_char nfc_getc(FILE *f);
620 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
621 #endif
622
623 #ifdef INPUT_OPTION
624 static int cap_f = FALSE;
625 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
626 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
627 static nkf_char cap_getc(FILE *f);
628 static nkf_char cap_ungetc(nkf_char c,FILE *f);
629
630 static int url_f = FALSE;
631 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
632 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
633 static nkf_char url_getc(FILE *f);
634 static nkf_char url_ungetc(nkf_char c,FILE *f);
635 #endif
636
637 #if defined(INT_IS_SHORT)
638 #define NKF_INT32_C(n)   (n##L)
639 #else
640 #define NKF_INT32_C(n)   (n)
641 #endif
642 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
643 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
644 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
645 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
646 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
647 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
648 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
649
650 #ifdef NUMCHAR_OPTION
651 static int numchar_f = FALSE;
652 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
653 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
654 static nkf_char numchar_getc(FILE *f);
655 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
656 #endif
657
658 #ifdef CHECK_OPTION
659 static int noout_f = FALSE;
660 static void no_putc(nkf_char c);
661 static int debug_f = FALSE;
662 static void debug(const char *str);
663 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
664 #endif
665
666 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
667 #if !defined PERL_XS
668 static  void    print_guessed_code(char *filename);
669 #endif
670 static  void    set_input_codename(char *codename);
671
672 #ifdef EXEC_IO
673 static int exec_f = 0;
674 #endif
675
676 #ifdef SHIFTJIS_CP932
677 /* invert IBM extended characters to others */
678 static int cp51932_f = FALSE;
679
680 /* invert NEC-selected IBM extended characters to IBM extended characters */
681 static int cp932inv_f = TRUE;
682
683 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
684 #endif /* SHIFTJIS_CP932 */
685
686 #ifdef X0212_ENABLE
687 static int x0212_f = FALSE;
688 static nkf_char x0212_shift(nkf_char c);
689 static nkf_char x0212_unshift(nkf_char c);
690 #endif
691 static int x0213_f = FALSE;
692
693 static unsigned char prefix_table[256];
694
695 static void set_code_score(struct input_code *ptr, nkf_char score);
696 static void clr_code_score(struct input_code *ptr, nkf_char score);
697 static void status_disable(struct input_code *ptr);
698 static void status_push_ch(struct input_code *ptr, nkf_char c);
699 static void status_clear(struct input_code *ptr);
700 static void status_reset(struct input_code *ptr);
701 static void status_reinit(struct input_code *ptr);
702 static void status_check(struct input_code *ptr, nkf_char c);
703 static void e_status(struct input_code *, nkf_char);
704 static void s_status(struct input_code *, nkf_char);
705
706 struct input_code input_code_list[] = {
707     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
708     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
709 #ifdef UTF8_INPUT_ENABLE
710     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
711     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
712     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
713 #endif
714     {0}
715 };
716
717 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
718 static int              base64_count = 0;
719
720 /* X0208 -> ASCII converter */
721
722 /* fold parameter */
723 static int             f_line = 0;    /* chars in line */
724 static int             f_prev = 0;
725 static int             fold_preserve_f = FALSE; /* preserve new lines */
726 static int             fold_f  = FALSE;
727 static int             fold_len  = 0;
728
729 /* options */
730 static unsigned char   kanji_intro = DEFAULT_J;
731 static unsigned char   ascii_intro = DEFAULT_R;
732
733 /* Folding */
734
735 #define FOLD_MARGIN  10
736 #define DEFAULT_FOLD 60
737
738 static int             fold_margin  = FOLD_MARGIN;
739
740 /* converters */
741
742 #ifdef DEFAULT_CODE_JIS
743 #   define  DEFAULT_CONV j_oconv
744 #endif
745 #ifdef DEFAULT_CODE_SJIS
746 #   define  DEFAULT_CONV s_oconv
747 #endif
748 #ifdef DEFAULT_CODE_EUC
749 #   define  DEFAULT_CONV e_oconv
750 #endif
751 #ifdef DEFAULT_CODE_UTF8
752 #   define  DEFAULT_CONV w_oconv
753 #endif
754
755 /* process default */
756 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
757
758 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
759 /* s_iconv or oconv */
760 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
761
762 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
763 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
764 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
765 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
766 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
767 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
768 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
769
770 /* static redirections */
771
772 static  void   (*o_putc)(nkf_char c) = std_putc;
773
774 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
775 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
776
777 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
778 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
779
780 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
781
782 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
783 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
784
785 /* for strict mime */
786 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
787 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
788
789 /* Global states */
790 static int output_mode = ASCII,    /* output kanji mode */
791            input_mode =  ASCII,    /* input kanji mode */
792            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
793 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
794
795 /* X0201 / X0208 conversion tables */
796
797 /* X0201 kana conversion table */
798 /* 90-9F A0-DF */
799 static const unsigned char cv[]= {
800     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
801     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
802     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
803     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
804     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
805     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
806     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
807     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
808     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
809     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
810     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
811     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
812     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
813     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
814     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
815     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
816     0x00,0x00};
817
818
819 /* X0201 kana conversion table for daguten */
820 /* 90-9F A0-DF */
821 static const unsigned char dv[]= {
822     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
827     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
828     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
829     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
830     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
831     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
832     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
833     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
834     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
838     0x00,0x00};
839
840 /* X0201 kana conversion table for han-daguten */
841 /* 90-9F A0-DF */
842 static const unsigned char ev[]= {
843     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
854     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
855     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
858     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859     0x00,0x00};
860
861
862 /* X0208 kigou conversion table */
863 /* 0x8140 - 0x819e */
864 static const unsigned char fv[] = {
865
866     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
867     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
868     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
869     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
870     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
871     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
872     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
873     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
874     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
875     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
877     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
878 } ;
879
880
881
882 static int             file_out_f = FALSE;
883 #ifdef OVERWRITE
884 static int             overwrite_f = FALSE;
885 static int             preserve_time_f = FALSE;
886 static int             backup_f = FALSE;
887 static char            *backup_suffix = "";
888 static char *get_backup_filename(const char *suffix, const char *filename);
889 #endif
890
891 static int nlmode_f = 0;   /* CR, LF, CRLF */
892 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
893 static nkf_char prev_cr = 0; /* CR or 0 */
894 #ifdef EASYWIN /*Easy Win */
895 static int             end_check;
896 #endif /*Easy Win */
897
898 #define STD_GC_BUFSIZE (256)
899 nkf_char std_gc_buf[STD_GC_BUFSIZE];
900 nkf_char std_gc_ndx;
901
902 char* nkf_strcpy(const char *str)
903 {
904     char* result = malloc(strlen(str) + 1);
905     if (!result){
906         perror(str);
907         return "";
908     }
909     strcpy(result, str);
910     return result;
911 }
912
913 static void nkf_str_upcase(const char *src, char *dest, size_t length)
914 {
915     int i = 0;
916     for (; i < length && dest[i]; i++) {
917         dest[i] = nkf_toupper(src[i]);
918     }
919     dest[i] = 0;
920 }
921
922 static nkf_encoding *nkf_enc_from_index(int idx)
923 {
924     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
925         return 0;
926     }
927     return &nkf_encoding_table[idx];
928 }
929
930 static int nkf_enc_find_index(const char *name)
931 {
932     int i, index = -1;
933     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
934         if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
935             return encoding_name_to_id_table[i].id;
936         }
937     }
938     return index;
939 }
940
941 static nkf_encoding *nkf_enc_find(const char *name)
942 {
943     int idx = -1;
944     idx = nkf_enc_find_index(name);
945     if (idx < 0) return 0;
946     return nkf_enc_from_index(idx);
947 }
948
949 #define nkf_enc_name(enc) (enc)->name
950 #define nkf_enc_to_index(enc) (enc)->id
951 #define nkf_enc_to_base_encoding(enc) (enc)->based_encoding
952
953 #ifdef WIN32DLL
954 #include "nkf32dll.c"
955 #elif defined(PERL_XS)
956 #else /* WIN32DLL */
957 int main(int argc, char **argv)
958 {
959     FILE  *fin;
960     unsigned char  *cp;
961
962     char *outfname = NULL;
963     char *origfname;
964
965 #ifdef EASYWIN /*Easy Win */
966     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
967 #endif
968
969     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
970         cp = (unsigned char *)*argv;
971         options(cp);
972         if (guess_f) {
973 #ifdef CHECK_OPTION
974             int debug_f_back = debug_f;
975 #endif
976 #ifdef EXEC_IO
977             int exec_f_back = exec_f;
978 #endif
979 #ifdef X0212_ENABLE
980             int x0212_f_back = x0212_f;
981 #endif
982             int x0213_f_back = x0213_f;
983             int guess_f_back = guess_f;
984             reinit();
985             guess_f = guess_f_back;
986             mime_f = FALSE;
987 #ifdef CHECK_OPTION
988             debug_f = debug_f_back;
989 #endif
990 #ifdef EXEC_IO
991             exec_f = exec_f_back;
992 #endif
993 #ifdef X0212_ENABLE
994             x0212_f = x0212_f_back;
995 #endif
996             x0213_f = x0213_f_back;
997         }
998 #ifdef EXEC_IO
999         if (exec_f){
1000             int fds[2], pid;
1001             if (pipe(fds) < 0 || (pid = fork()) < 0){
1002                 abort();
1003             }
1004             if (pid == 0){
1005                 if (exec_f > 0){
1006                     close(fds[0]);
1007                     dup2(fds[1], 1);
1008                 }else{
1009                     close(fds[1]);
1010                     dup2(fds[0], 0);
1011                 }
1012                 execvp(argv[1], &argv[1]);
1013             }
1014             if (exec_f > 0){
1015                 close(fds[1]);
1016                 dup2(fds[0], 0);
1017             }else{
1018                 close(fds[0]);
1019                 dup2(fds[1], 1);
1020             }
1021             argc = 0;
1022             break;
1023         }
1024 #endif
1025     }
1026
1027     if (binmode_f == TRUE)
1028 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1029     if (freopen("","wb",stdout) == NULL)
1030         return (-1);
1031 #else
1032     setbinmode(stdout);
1033 #endif
1034
1035     if (unbuf_f)
1036       setbuf(stdout, (char *) NULL);
1037     else
1038       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
1039
1040     if (argc == 0) {
1041       if (binmode_f == TRUE)
1042 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1043       if (freopen("","rb",stdin) == NULL) return (-1);
1044 #else
1045       setbinmode(stdin);
1046 #endif
1047       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
1048       if (nop_f)
1049           noconvert(stdin);
1050       else {
1051           kanji_convert(stdin);
1052           if (guess_f) print_guessed_code(NULL);
1053       }
1054     } else {
1055       int nfiles = argc;
1056         int is_argument_error = FALSE;
1057       while (argc--) {
1058             input_codename = NULL;
1059             input_newline = 0;
1060 #ifdef CHECK_OPTION
1061             iconv_for_check = 0;
1062 #endif
1063           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
1064                 perror(*(argv-1));
1065                 is_argument_error = TRUE;
1066                 continue;
1067           } else {
1068 #ifdef OVERWRITE
1069               int fd = 0;
1070               int fd_backup = 0;
1071 #endif
1072
1073 /* reopen file for stdout */
1074               if (file_out_f == TRUE) {
1075 #ifdef OVERWRITE
1076                   if (overwrite_f){
1077                       outfname = malloc(strlen(origfname)
1078                                         + strlen(".nkftmpXXXXXX")
1079                                         + 1);
1080                       if (!outfname){
1081                           perror(origfname);
1082                           return -1;
1083                       }
1084                       strcpy(outfname, origfname);
1085 #ifdef MSDOS
1086                       {
1087                           int i;
1088                           for (i = strlen(outfname); i; --i){
1089                               if (outfname[i - 1] == '/'
1090                                   || outfname[i - 1] == '\\'){
1091                                   break;
1092                               }
1093                           }
1094                           outfname[i] = '\0';
1095                       }
1096                       strcat(outfname, "ntXXXXXX");
1097                       mktemp(outfname);
1098                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
1099                                 S_IREAD | S_IWRITE);
1100 #else
1101                       strcat(outfname, ".nkftmpXXXXXX");
1102                       fd = mkstemp(outfname);
1103 #endif
1104                       if (fd < 0
1105                           || (fd_backup = dup(fileno(stdout))) < 0
1106                           || dup2(fd, fileno(stdout)) < 0
1107                           ){
1108                           perror(origfname);
1109                           return -1;
1110                       }
1111                   }else
1112 #endif
1113                   if(argc == 1) {
1114                       outfname = *argv++;
1115                       argc--;
1116                   } else {
1117                       outfname = "nkf.out";
1118                   }
1119
1120                   if(freopen(outfname, "w", stdout) == NULL) {
1121                       perror (outfname);
1122                       return (-1);
1123                   }
1124                   if (binmode_f == TRUE) {
1125 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1126                       if (freopen("","wb",stdout) == NULL)
1127                            return (-1);
1128 #else
1129                       setbinmode(stdout);
1130 #endif
1131                   }
1132               }
1133               if (binmode_f == TRUE)
1134 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1135                  if (freopen("","rb",fin) == NULL)
1136                     return (-1);
1137 #else
1138                  setbinmode(fin);
1139 #endif
1140               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
1141               if (nop_f)
1142                   noconvert(fin);
1143               else {
1144                   char *filename = NULL;
1145                   kanji_convert(fin);
1146                   if (nfiles > 1) filename = origfname;
1147                   if (guess_f) print_guessed_code(filename);
1148               }
1149               fclose(fin);
1150 #ifdef OVERWRITE
1151               if (overwrite_f) {
1152                   struct stat     sb;
1153 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1154                   time_t tb[2];
1155 #else
1156                   struct utimbuf  tb;
1157 #endif
1158
1159                   fflush(stdout);
1160                   close(fd);
1161                   if (dup2(fd_backup, fileno(stdout)) < 0){
1162                       perror("dup2");
1163                   }
1164                   if (stat(origfname, &sb)) {
1165                       fprintf(stderr, "Can't stat %s\n", origfname);
1166                   }
1167                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
1168                   if (chmod(outfname, sb.st_mode)) {
1169                       fprintf(stderr, "Can't set permission %s\n", outfname);
1170                   }
1171
1172                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
1173                     if(preserve_time_f){
1174 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1175                         tb[0] = tb[1] = sb.st_mtime;
1176                         if (utime(outfname, tb)) {
1177                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1178                         }
1179 #else
1180                         tb.actime  = sb.st_atime;
1181                         tb.modtime = sb.st_mtime;
1182                         if (utime(outfname, &tb)) {
1183                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1184                         }
1185 #endif
1186                     }
1187                     if(backup_f){
1188                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
1189 #ifdef MSDOS
1190                         unlink(backup_filename);
1191 #endif
1192                         if (rename(origfname, backup_filename)) {
1193                             perror(backup_filename);
1194                             fprintf(stderr, "Can't rename %s to %s\n",
1195                                     origfname, backup_filename);
1196                         }
1197                     }else{
1198 #ifdef MSDOS
1199                         if (unlink(origfname)){
1200                             perror(origfname);
1201                         }
1202 #endif
1203                     }
1204                   if (rename(outfname, origfname)) {
1205                       perror(origfname);
1206                       fprintf(stderr, "Can't rename %s to %s\n",
1207                               outfname, origfname);
1208                   }
1209                   free(outfname);
1210               }
1211 #endif
1212           }
1213       }
1214         if (is_argument_error)
1215             return(-1);
1216     }
1217 #ifdef EASYWIN /*Easy Win */
1218     if (file_out_f == FALSE)
1219         scanf("%d",&end_check);
1220     else
1221         fclose(stdout);
1222 #else /* for Other OS */
1223     if (file_out_f == TRUE)
1224         fclose(stdout);
1225 #endif /*Easy Win */
1226     return (0);
1227 }
1228 #endif /* WIN32DLL */
1229
1230 #ifdef OVERWRITE
1231 char *get_backup_filename(const char *suffix, const char *filename)
1232 {
1233     char *backup_filename;
1234     int asterisk_count = 0;
1235     int i, j;
1236     int filename_length = strlen(filename);
1237
1238     for(i = 0; suffix[i]; i++){
1239         if(suffix[i] == '*') asterisk_count++;
1240     }
1241
1242     if(asterisk_count){
1243         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1244         if (!backup_filename){
1245             perror("Can't malloc backup filename.");
1246             return NULL;
1247         }
1248
1249         for(i = 0, j = 0; suffix[i];){
1250             if(suffix[i] == '*'){
1251                 backup_filename[j] = '\0';
1252                 strncat(backup_filename, filename, filename_length);
1253                 i++;
1254                 j += filename_length;
1255             }else{
1256                 backup_filename[j++] = suffix[i++];
1257             }
1258         }
1259         backup_filename[j] = '\0';
1260     }else{
1261         j = strlen(suffix) + filename_length;
1262         backup_filename = malloc( + 1);
1263         strcpy(backup_filename, filename);
1264         strcat(backup_filename, suffix);
1265         backup_filename[j] = '\0';
1266     }
1267     return backup_filename;
1268 }
1269 #endif
1270
1271 static const struct {
1272     const char *name;
1273     const char *alias;
1274 } long_option[] = {
1275     {"ic=", ""},
1276     {"oc=", ""},
1277     {"base64","jMB"},
1278     {"euc","e"},
1279     {"euc-input","E"},
1280     {"fj","jm"},
1281     {"help","v"},
1282     {"jis","j"},
1283     {"jis-input","J"},
1284     {"mac","sLm"},
1285     {"mime","jM"},
1286     {"mime-input","m"},
1287     {"msdos","sLw"},
1288     {"sjis","s"},
1289     {"sjis-input","S"},
1290     {"unix","eLu"},
1291     {"version","V"},
1292     {"windows","sLw"},
1293     {"hiragana","h1"},
1294     {"katakana","h2"},
1295     {"katakana-hiragana","h3"},
1296     {"guess=", ""},
1297     {"guess", "g1"},
1298     {"cp932", ""},
1299     {"no-cp932", ""},
1300 #ifdef X0212_ENABLE
1301     {"x0212", ""},
1302 #endif
1303 #ifdef UTF8_OUTPUT_ENABLE
1304     {"utf8", "w"},
1305     {"utf16", "w16"},
1306     {"ms-ucs-map", ""},
1307     {"fb-skip", ""},
1308     {"fb-html", ""},
1309     {"fb-xml", ""},
1310     {"fb-perl", ""},
1311     {"fb-java", ""},
1312     {"fb-subchar", ""},
1313     {"fb-subchar=", ""},
1314 #endif
1315 #ifdef UTF8_INPUT_ENABLE
1316     {"utf8-input", "W"},
1317     {"utf16-input", "W16"},
1318     {"no-cp932ext", ""},
1319     {"no-best-fit-chars",""},
1320 #endif
1321 #ifdef UNICODE_NORMALIZATION
1322     {"utf8mac-input", ""},
1323 #endif
1324 #ifdef OVERWRITE
1325     {"overwrite", ""},
1326     {"overwrite=", ""},
1327     {"in-place", ""},
1328     {"in-place=", ""},
1329 #endif
1330 #ifdef INPUT_OPTION
1331     {"cap-input", ""},
1332     {"url-input", ""},
1333 #endif
1334 #ifdef NUMCHAR_OPTION
1335     {"numchar-input", ""},
1336 #endif
1337 #ifdef CHECK_OPTION
1338     {"no-output", ""},
1339     {"debug", ""},
1340 #endif
1341 #ifdef SHIFTJIS_CP932
1342     {"cp932inv", ""},
1343 #endif
1344 #ifdef EXEC_IO
1345     {"exec-in", ""},
1346     {"exec-out", ""},
1347 #endif
1348     {"prefix=", ""},
1349 };
1350
1351 static int option_mode = 0;
1352
1353 void options(unsigned char *cp)
1354 {
1355     nkf_char i, j;
1356     unsigned char *p;
1357     unsigned char *cp_back = NULL;
1358     char codeset[32];
1359     nkf_encoding *enc;
1360
1361     if (option_mode==1)
1362         return;
1363     while(*cp && *cp++!='-');
1364     while (*cp || cp_back) {
1365         if(!*cp){
1366             cp = cp_back;
1367             cp_back = NULL;
1368             continue;
1369         }
1370         p = 0;
1371         switch (*cp++) {
1372         case '-':  /* literal options */
1373             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1374                 option_mode = 1;
1375                 return;
1376             }
1377             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1378                 p = (unsigned char *)long_option[i].name;
1379                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1380                 if (*p == cp[j] || cp[j] == SP){
1381                     p = &cp[j] + 1;
1382                     break;
1383                 }
1384                 p = 0;
1385             }
1386             if (p == 0) {
1387                 fprintf(stderr, "unknown long option: --%s\n", cp);
1388                 return;
1389             }
1390             while(*cp && *cp != SP && cp++);
1391             if (long_option[i].alias[0]){
1392                 cp_back = cp;
1393                 cp = (unsigned char *)long_option[i].alias;
1394             }else{
1395                 if (strcmp(long_option[i].name, "ic=") == 0){
1396                     nkf_str_upcase((char *)p, codeset, 32);
1397                     enc = nkf_enc_find(codeset);
1398                     switch (nkf_enc_to_index(enc)) {
1399                     case ISO_2022_JP:
1400                         input_f = JIS_INPUT;
1401                         break;
1402                     case CP50220:
1403                     case CP50221:
1404                     case CP50222:
1405                         input_f = JIS_INPUT;
1406 #ifdef SHIFTJIS_CP932
1407                         cp51932_f = TRUE;
1408 #endif
1409 #ifdef UTF8_OUTPUT_ENABLE
1410                         ms_ucs_map_f = UCS_MAP_CP932;
1411 #endif
1412                         break;
1413                     case ISO_2022_JP_1:
1414                         input_f = JIS_INPUT;
1415 #ifdef X0212_ENABLE
1416                         x0212_f = TRUE;
1417 #endif
1418                         break;
1419                     case ISO_2022_JP_3:
1420                         input_f = JIS_INPUT;
1421 #ifdef X0212_ENABLE
1422                         x0212_f = TRUE;
1423 #endif
1424                         x0213_f = TRUE;
1425                         break;
1426                     case SHIFT_JIS:
1427                         input_f = SJIS_INPUT;
1428                         break;
1429                     case WINDOWS_31J:
1430                         input_f = SJIS_INPUT;
1431 #ifdef SHIFTJIS_CP932
1432                         cp51932_f = TRUE;
1433 #endif
1434 #ifdef UTF8_OUTPUT_ENABLE
1435                         ms_ucs_map_f = UCS_MAP_CP932;
1436 #endif
1437                         break;
1438                     case CP10001:
1439                         input_f = SJIS_INPUT;
1440 #ifdef SHIFTJIS_CP932
1441                         cp51932_f = TRUE;
1442 #endif
1443 #ifdef UTF8_OUTPUT_ENABLE
1444                         ms_ucs_map_f = UCS_MAP_CP10001;
1445 #endif
1446                         break;
1447                     case EUC_JP:
1448                         input_f = EUC_INPUT;
1449                         break;
1450                     case CP51932:
1451                         input_f = EUC_INPUT;
1452 #ifdef SHIFTJIS_CP932
1453                         cp51932_f = TRUE;
1454 #endif
1455 #ifdef UTF8_OUTPUT_ENABLE
1456                         ms_ucs_map_f = UCS_MAP_CP932;
1457 #endif
1458                         break;
1459                     case EUCJP_MS:
1460                         input_f = EUC_INPUT;
1461 #ifdef SHIFTJIS_CP932
1462                         cp51932_f = FALSE;
1463 #endif
1464 #ifdef UTF8_OUTPUT_ENABLE
1465                         ms_ucs_map_f = UCS_MAP_MS;
1466 #endif
1467                         break;
1468                     case EUCJP_ASCII:
1469                         input_f = EUC_INPUT;
1470 #ifdef SHIFTJIS_CP932
1471                         cp51932_f = FALSE;
1472 #endif
1473 #ifdef UTF8_OUTPUT_ENABLE
1474                         ms_ucs_map_f = UCS_MAP_ASCII;
1475 #endif
1476                         break;
1477                     case SHIFT_JISX0213:
1478                     case SHIFT_JIS_2004:
1479                         input_f = SJIS_INPUT;
1480                         x0213_f = TRUE;
1481 #ifdef SHIFTJIS_CP932
1482                         cp51932_f = FALSE;
1483 #endif
1484                         break;
1485                     case EUC_JISX0213:
1486                     case EUC_JIS_2004:
1487                         input_f = EUC_INPUT;
1488                         x0213_f = TRUE;
1489 #ifdef SHIFTJIS_CP932
1490                         cp51932_f = FALSE;
1491 #endif
1492                         break;
1493 #ifdef UTF8_INPUT_ENABLE
1494                     case UTF_8:
1495                     case UTF_8N:
1496                     case UTF_8_BOM:
1497                         input_f = UTF8_INPUT;
1498                         break;
1499 #ifdef UNICODE_NORMALIZATION
1500                     case UTF8_MAC:
1501                         input_f = UTF8_INPUT;
1502                         nfc_f = TRUE;
1503                         break;
1504 #endif
1505                     case UTF_16:
1506                     case UTF_16BE:
1507                     case UTF_16BE_BOM:
1508                         input_f = UTF16_INPUT;
1509                         input_endian = ENDIAN_BIG;
1510                         break;
1511                     case UTF_16LE:
1512                     case UTF_16LE_BOM:
1513                         input_f = UTF16_INPUT;
1514                         input_endian = ENDIAN_LITTLE;
1515                         break;
1516                     case UTF_32:
1517                     case UTF_32BE:
1518                     case UTF_32BE_BOM:
1519                         input_f = UTF32_INPUT;
1520                         input_endian = ENDIAN_BIG;
1521                         break;
1522                     case UTF_32LE:
1523                     case UTF_32LE_BOM:
1524                         input_f = UTF32_INPUT;
1525                         input_endian = ENDIAN_LITTLE;
1526                         break;
1527 #endif
1528                     default:
1529                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1530                         break;
1531                     }
1532                     continue;
1533                 }
1534                 if (strcmp(long_option[i].name, "oc=") == 0){
1535                     x0201_f = FALSE;
1536                     nkf_str_upcase((char *)p, codeset, 32);
1537                     output_encoding = nkf_enc_find(codeset);
1538                     switch (nkf_enc_to_index(output_encoding)) {
1539                     case ISO_2022_JP:
1540                         output_conv = j_oconv;
1541                         break;
1542                     case CP50220:
1543                             output_conv = j_oconv;
1544                             x0201_f = TRUE;
1545 #ifdef SHIFTJIS_CP932
1546                             cp932inv_f = FALSE;
1547 #endif
1548 #ifdef UTF8_OUTPUT_ENABLE
1549                             ms_ucs_map_f = UCS_MAP_CP932;
1550 #endif
1551                         break;
1552                     case CP50221:
1553                         output_conv = j_oconv;
1554 #ifdef SHIFTJIS_CP932
1555                         cp932inv_f = FALSE;
1556 #endif
1557 #ifdef UTF8_OUTPUT_ENABLE
1558                         ms_ucs_map_f = UCS_MAP_CP932;
1559 #endif
1560                         break;
1561                     case ISO_2022_JP_1:
1562                         output_conv = j_oconv;
1563 #ifdef X0212_ENABLE
1564                         x0212_f = TRUE;
1565 #endif
1566 #ifdef SHIFTJIS_CP932
1567                         cp932inv_f = FALSE;
1568 #endif
1569                         break;
1570                     case ISO_2022_JP_3:
1571                         output_conv = j_oconv;
1572 #ifdef X0212_ENABLE
1573                         x0212_f = TRUE;
1574 #endif
1575                         x0213_f = TRUE;
1576 #ifdef SHIFTJIS_CP932
1577                         cp932inv_f = FALSE;
1578 #endif
1579                         break;
1580                     case SHIFT_JIS:
1581                         output_conv = s_oconv;
1582                         break;
1583                     case WINDOWS_31J:
1584                         output_conv = s_oconv;
1585 #ifdef UTF8_OUTPUT_ENABLE
1586                         ms_ucs_map_f = UCS_MAP_CP932;
1587 #endif
1588                         break;
1589                     case CP10001:
1590                         output_conv = s_oconv;
1591 #ifdef UTF8_OUTPUT_ENABLE
1592                         ms_ucs_map_f = UCS_MAP_CP10001;
1593 #endif
1594                         break;
1595                     case EUC_JP:
1596                         output_conv = e_oconv;
1597                         break;
1598                     case CP51932:
1599                         output_conv = e_oconv;
1600 #ifdef SHIFTJIS_CP932
1601                         cp932inv_f = FALSE;
1602 #endif
1603 #ifdef UTF8_OUTPUT_ENABLE
1604                         ms_ucs_map_f = UCS_MAP_CP932;
1605 #endif
1606                         break;
1607                     case EUCJP_MS:
1608                         output_conv = e_oconv;
1609 #ifdef X0212_ENABLE
1610                         x0212_f = TRUE;
1611 #endif
1612 #ifdef UTF8_OUTPUT_ENABLE
1613                         ms_ucs_map_f = UCS_MAP_MS;
1614 #endif
1615                         break;
1616                     case EUCJP_ASCII:
1617                         output_conv = e_oconv;
1618 #ifdef X0212_ENABLE
1619                         x0212_f = TRUE;
1620 #endif
1621 #ifdef UTF8_OUTPUT_ENABLE
1622                         ms_ucs_map_f = UCS_MAP_ASCII;
1623 #endif
1624                         break;
1625                     case SHIFT_JISX0213:
1626                     case SHIFT_JIS_2004:
1627                             output_conv = s_oconv;
1628                             x0213_f = TRUE;
1629 #ifdef SHIFTJIS_CP932
1630                             cp932inv_f = FALSE;
1631 #endif
1632                         break;
1633                     case EUC_JISX0213:
1634                     case EUC_JIS_2004:
1635                         output_conv = e_oconv;
1636 #ifdef X0212_ENABLE
1637                         x0212_f = TRUE;
1638 #endif
1639                         x0213_f = TRUE;
1640 #ifdef SHIFTJIS_CP932
1641                         cp932inv_f = FALSE;
1642 #endif
1643                         break;
1644 #ifdef UTF8_OUTPUT_ENABLE
1645                     case UTF_8:
1646                     case UTF_8N:
1647                         output_conv = w_oconv;
1648                         break;
1649                     case UTF_8_BOM:
1650                         output_conv = w_oconv;
1651                         output_bom_f = TRUE;
1652                         break;
1653                     case UTF_16BE:
1654                         output_conv = w_oconv16;
1655                         break;
1656                     case UTF_16:
1657                     case UTF_16BE_BOM:
1658                         output_conv = w_oconv16;
1659                         output_bom_f = TRUE;
1660                         break;
1661                     case UTF_16LE:
1662                         output_conv = w_oconv16;
1663                         output_endian = ENDIAN_LITTLE;
1664                         break;
1665                     case UTF_16LE_BOM:
1666                         output_conv = w_oconv16;
1667                         output_endian = ENDIAN_LITTLE;
1668                         output_bom_f = TRUE;
1669                         break;
1670                     case UTF_32:
1671                     case UTF_32BE:
1672                         output_conv = w_oconv32;
1673                         break;
1674                     case UTF_32BE_BOM:
1675                         output_conv = w_oconv32;
1676                         output_bom_f = TRUE;
1677                         break;
1678                     case UTF_32LE:
1679                         output_conv = w_oconv32;
1680                         output_endian = ENDIAN_LITTLE;
1681                         break;
1682                     case UTF_32LE_BOM:
1683                         output_conv = w_oconv32;
1684                         output_endian = ENDIAN_LITTLE;
1685                         output_bom_f = TRUE;
1686                         break;
1687 #endif
1688                     default:
1689                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1690                         break;
1691                     }
1692                     continue;
1693                 }
1694                 if (strcmp(long_option[i].name, "guess=") == 0){
1695                     if (p[0] == '1') {
1696                         guess_f = 2;
1697                     } else {
1698                         guess_f = 1;
1699                     }
1700                     continue;
1701                 }
1702 #ifdef OVERWRITE
1703                 if (strcmp(long_option[i].name, "overwrite") == 0){
1704                     file_out_f = TRUE;
1705                     overwrite_f = TRUE;
1706                     preserve_time_f = TRUE;
1707                     continue;
1708                 }
1709                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1710                     file_out_f = TRUE;
1711                     overwrite_f = TRUE;
1712                     preserve_time_f = TRUE;
1713                     backup_f = TRUE;
1714                     backup_suffix = malloc(strlen((char *) p) + 1);
1715                     strcpy(backup_suffix, (char *) p);
1716                     continue;
1717                 }
1718                 if (strcmp(long_option[i].name, "in-place") == 0){
1719                     file_out_f = TRUE;
1720                     overwrite_f = TRUE;
1721                     preserve_time_f = FALSE;
1722                     continue;
1723                 }
1724                 if (strcmp(long_option[i].name, "in-place=") == 0){
1725                     file_out_f = TRUE;
1726                     overwrite_f = TRUE;
1727                     preserve_time_f = FALSE;
1728                     backup_f = TRUE;
1729                     backup_suffix = malloc(strlen((char *) p) + 1);
1730                     strcpy(backup_suffix, (char *) p);
1731                     continue;
1732                 }
1733 #endif
1734 #ifdef INPUT_OPTION
1735                 if (strcmp(long_option[i].name, "cap-input") == 0){
1736                     cap_f = TRUE;
1737                     continue;
1738                 }
1739                 if (strcmp(long_option[i].name, "url-input") == 0){
1740                     url_f = TRUE;
1741                     continue;
1742                 }
1743 #endif
1744 #ifdef NUMCHAR_OPTION
1745                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1746                     numchar_f = TRUE;
1747                     continue;
1748                 }
1749 #endif
1750 #ifdef CHECK_OPTION
1751                 if (strcmp(long_option[i].name, "no-output") == 0){
1752                     noout_f = TRUE;
1753                     continue;
1754                 }
1755                 if (strcmp(long_option[i].name, "debug") == 0){
1756                     debug_f = TRUE;
1757                     continue;
1758                 }
1759 #endif
1760                 if (strcmp(long_option[i].name, "cp932") == 0){
1761 #ifdef SHIFTJIS_CP932
1762                     cp51932_f = TRUE;
1763                     cp932inv_f = TRUE;
1764 #endif
1765 #ifdef UTF8_OUTPUT_ENABLE
1766                     ms_ucs_map_f = UCS_MAP_CP932;
1767 #endif
1768                     continue;
1769                 }
1770                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1771 #ifdef SHIFTJIS_CP932
1772                     cp51932_f = FALSE;
1773                     cp932inv_f = FALSE;
1774 #endif
1775 #ifdef UTF8_OUTPUT_ENABLE
1776                     ms_ucs_map_f = UCS_MAP_ASCII;
1777 #endif
1778                     continue;
1779                 }
1780 #ifdef SHIFTJIS_CP932
1781                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1782                     cp932inv_f = TRUE;
1783                     continue;
1784                 }
1785 #endif
1786
1787 #ifdef X0212_ENABLE
1788                 if (strcmp(long_option[i].name, "x0212") == 0){
1789                     x0212_f = TRUE;
1790                     continue;
1791                 }
1792 #endif
1793
1794 #ifdef EXEC_IO
1795                   if (strcmp(long_option[i].name, "exec-in") == 0){
1796                       exec_f = 1;
1797                       return;
1798                   }
1799                   if (strcmp(long_option[i].name, "exec-out") == 0){
1800                       exec_f = -1;
1801                       return;
1802                   }
1803 #endif
1804 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1805                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1806                     no_cp932ext_f = TRUE;
1807                     continue;
1808                 }
1809                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1810                     no_best_fit_chars_f = TRUE;
1811                     continue;
1812                 }
1813                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1814                     encode_fallback = NULL;
1815                     continue;
1816                 }
1817                 if (strcmp(long_option[i].name, "fb-html") == 0){
1818                     encode_fallback = encode_fallback_html;
1819                     continue;
1820                 }
1821                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1822                     encode_fallback = encode_fallback_xml;
1823                     continue;
1824                 }
1825                 if (strcmp(long_option[i].name, "fb-java") == 0){
1826                     encode_fallback = encode_fallback_java;
1827                     continue;
1828                 }
1829                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1830                     encode_fallback = encode_fallback_perl;
1831                     continue;
1832                 }
1833                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1834                     encode_fallback = encode_fallback_subchar;
1835                     continue;
1836                 }
1837                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1838                     encode_fallback = encode_fallback_subchar;
1839                     unicode_subchar = 0;
1840                     if (p[0] != '0'){
1841                         /* decimal number */
1842                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1843                             unicode_subchar *= 10;
1844                             unicode_subchar += hex2bin(p[i]);
1845                         }
1846                     }else if(p[1] == 'x' || p[1] == 'X'){
1847                         /* hexadecimal number */
1848                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1849                             unicode_subchar <<= 4;
1850                             unicode_subchar |= hex2bin(p[i]);
1851                         }
1852                     }else{
1853                         /* octal number */
1854                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1855                             unicode_subchar *= 8;
1856                             unicode_subchar += hex2bin(p[i]);
1857                         }
1858                     }
1859                     w16e_conv(unicode_subchar, &i, &j);
1860                     unicode_subchar = i<<8 | j;
1861                     continue;
1862                 }
1863 #endif
1864 #ifdef UTF8_OUTPUT_ENABLE
1865                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1866                     ms_ucs_map_f = UCS_MAP_MS;
1867                     continue;
1868                 }
1869 #endif
1870 #ifdef UNICODE_NORMALIZATION
1871                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1872                     input_f = UTF8_INPUT;
1873                     nfc_f = TRUE;
1874                     continue;
1875                 }
1876 #endif
1877                 if (strcmp(long_option[i].name, "prefix=") == 0){
1878                     if (nkf_isgraph(p[0])){
1879                         for (i = 1; nkf_isgraph(p[i]); i++){
1880                             prefix_table[p[i]] = p[0];
1881                         }
1882                     }
1883                     continue;
1884                 }
1885             }
1886             continue;
1887         case 'b':           /* buffered mode */
1888             unbuf_f = FALSE;
1889             continue;
1890         case 'u':           /* non bufferd mode */
1891             unbuf_f = TRUE;
1892             continue;
1893         case 't':           /* transparent mode */
1894             if (*cp=='1') {
1895                 /* alias of -t */
1896                 nop_f = TRUE;
1897                 *cp += 1;
1898             } else if (*cp=='2') {
1899                 /*
1900                  * -t with put/get
1901                  *
1902                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1903                  *
1904                  */
1905                 nop_f = 2;
1906                 *cp += 1;
1907             } else
1908                 nop_f = TRUE;
1909             continue;
1910         case 'j':           /* JIS output */
1911         case 'n':
1912             output_conv = j_oconv;
1913             output_encoding = nkf_enc_from_index(ISO_2022_JP);
1914             continue;
1915         case 'e':           /* AT&T EUC output */
1916             output_conv = e_oconv;
1917             cp932inv_f = FALSE;
1918             output_encoding = nkf_enc_from_index(EUC_JP);
1919             continue;
1920         case 's':           /* SJIS output */
1921             output_conv = s_oconv;
1922             output_encoding = nkf_enc_from_index(SHIFT_JIS);
1923             continue;
1924         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1925             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1926             input_f = LATIN1_INPUT;
1927             continue;
1928         case 'i':           /* Kanji IN ESC-$-@/B */
1929             if (*cp=='@'||*cp=='B')
1930                 kanji_intro = *cp++;
1931             continue;
1932         case 'o':           /* ASCII IN ESC-(-J/B */
1933             if (*cp=='J'||*cp=='B'||*cp=='H')
1934                 ascii_intro = *cp++;
1935             continue;
1936         case 'h':
1937             /*
1938                 bit:1   katakana->hiragana
1939                 bit:2   hiragana->katakana
1940             */
1941             if ('9'>= *cp && *cp>='0')
1942                 hira_f |= (*cp++ -'0');
1943             else
1944                 hira_f |= 1;
1945             continue;
1946         case 'r':
1947             rot_f = TRUE;
1948             continue;
1949 #if defined(MSDOS) || defined(__OS2__)
1950         case 'T':
1951             binmode_f = FALSE;
1952             continue;
1953 #endif
1954 #ifndef PERL_XS
1955         case 'V':
1956             show_configuration();
1957             exit(1);
1958             break;
1959         case 'v':
1960             usage();
1961             exit(1);
1962             break;
1963 #endif
1964 #ifdef UTF8_OUTPUT_ENABLE
1965         case 'w':           /* UTF-8 output */
1966             if (cp[0] == '8') {
1967                 output_conv = w_oconv; cp++;
1968                 if (cp[0] == '0'){
1969                     cp++;
1970                     output_encoding = nkf_enc_from_index(UTF_8N);
1971                 } else {
1972                     output_bom_f = TRUE;
1973                     output_encoding = nkf_enc_from_index(UTF_8_BOM);
1974                 }
1975             } else {
1976                 int enc_idx;
1977                 if ('1'== cp[0] && '6'==cp[1]) {
1978                     output_conv = w_oconv16; cp+=2;
1979                     enc_idx = UTF_16;
1980                 } else if ('3'== cp[0] && '2'==cp[1]) {
1981                     output_conv = w_oconv32; cp+=2;
1982                     enc_idx = UTF_32;
1983                 } else {
1984                     output_conv = w_oconv;
1985                     output_encoding = nkf_enc_from_index(UTF_8);
1986                     continue;
1987                 }
1988                 if (cp[0]=='L') {
1989                     cp++;
1990                     output_endian = ENDIAN_LITTLE;
1991                 } else if (cp[0] == 'B') {
1992                     cp++;
1993                 } else {
1994                     output_encoding = nkf_enc_from_index(enc_idx);
1995                     continue;
1996                 }
1997                 if (cp[0] == '0'){
1998                     cp++;
1999                     enc_idx = enc_idx == UTF_16
2000                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
2001                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
2002                 } else {
2003                     output_bom_f = TRUE;
2004                     enc_idx = enc_idx == UTF_16
2005                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
2006                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
2007                 }
2008                 output_encoding = nkf_enc_from_index(enc_idx);
2009             }
2010             continue;
2011 #endif
2012 #ifdef UTF8_INPUT_ENABLE
2013         case 'W':           /* UTF input */
2014             if (cp[0] == '8') {
2015                 cp++;
2016                 input_f = UTF8_INPUT;
2017             }else{
2018                 if ('1'== cp[0] && '6'==cp[1]) {
2019                     cp += 2;
2020                     input_f = UTF16_INPUT;
2021                     input_endian = ENDIAN_BIG;
2022                 } else if ('3'== cp[0] && '2'==cp[1]) {
2023                     cp += 2;
2024                     input_f = UTF32_INPUT;
2025                     input_endian = ENDIAN_BIG;
2026                 } else {
2027                     input_f = UTF8_INPUT;
2028                     continue;
2029                 }
2030                 if (cp[0]=='L') {
2031                     cp++;
2032                     input_endian = ENDIAN_LITTLE;
2033                 } else if (cp[0] == 'B') {
2034                     cp++;
2035                 }
2036             }
2037             continue;
2038 #endif
2039         /* Input code assumption */
2040         case 'J':   /* JIS input */
2041             input_f = JIS_INPUT;
2042             continue;
2043         case 'E':   /* AT&T EUC input */
2044             input_f = EUC_INPUT;
2045             continue;
2046         case 'S':   /* MS Kanji input */
2047             input_f = SJIS_INPUT;
2048             continue;
2049         case 'Z':   /* Convert X0208 alphabet to asii */
2050             /* alpha_f
2051                bit:0   Convert JIS X 0208 Alphabet to ASCII
2052                bit:1   Convert Kankaku to one space
2053                bit:2   Convert Kankaku to two spaces
2054                bit:3   Convert HTML Entity
2055                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
2056             */
2057             while ('0'<= *cp && *cp <='9') {
2058                 alpha_f |= 1 << (*cp++ - '0');
2059             }
2060             if (!alpha_f) alpha_f = 1;
2061             continue;
2062         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
2063             x0201_f = FALSE;    /* No X0201->X0208 conversion */
2064             /* accept  X0201
2065                     ESC-(-I     in JIS, EUC, MS Kanji
2066                     SI/SO       in JIS, EUC, MS Kanji
2067                     SSO         in EUC, JIS, not in MS Kanji
2068                     MS Kanji (0xa0-0xdf)
2069                output  X0201
2070                     ESC-(-I     in JIS (0x20-0x5f)
2071                     SSO         in EUC (0xa0-0xdf)
2072                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
2073             */
2074             continue;
2075         case 'X':   /* Convert X0201 kana to X0208 */
2076             x0201_f = TRUE;
2077             continue;
2078         case 'F':   /* prserve new lines */
2079             fold_preserve_f = TRUE;
2080         case 'f':   /* folding -f60 or -f */
2081             fold_f = TRUE;
2082             fold_len = 0;
2083             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2084                 fold_len *= 10;
2085                 fold_len += *cp++ - '0';
2086             }
2087             if (!(0<fold_len && fold_len<BUFSIZ))
2088                 fold_len = DEFAULT_FOLD;
2089             if (*cp=='-') {
2090                 fold_margin = 0;
2091                 cp++;
2092                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2093                     fold_margin *= 10;
2094                     fold_margin += *cp++ - '0';
2095                 }
2096             }
2097             continue;
2098         case 'm':   /* MIME support */
2099             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
2100             if (*cp=='B'||*cp=='Q') {
2101                 mime_decode_mode = *cp++;
2102                 mimebuf_f = FIXED_MIME;
2103             } else if (*cp=='N') {
2104                 mime_f = TRUE; cp++;
2105             } else if (*cp=='S') {
2106                 mime_f = STRICT_MIME; cp++;
2107             } else if (*cp=='0') {
2108                 mime_decode_f = FALSE;
2109                 mime_f = FALSE; cp++;
2110             }
2111             continue;
2112         case 'M':   /* MIME output */
2113             if (*cp=='B') {
2114                 mimeout_mode = 'B';
2115                 mimeout_f = FIXED_MIME; cp++;
2116             } else if (*cp=='Q') {
2117                 mimeout_mode = 'Q';
2118                 mimeout_f = FIXED_MIME; cp++;
2119             } else {
2120                 mimeout_f = TRUE;
2121             }
2122             continue;
2123         case 'B':   /* Broken JIS support */
2124             /*  bit:0   no ESC JIS
2125                 bit:1   allow any x on ESC-(-x or ESC-$-x
2126                 bit:2   reset to ascii on NL
2127             */
2128             if ('9'>= *cp && *cp>='0')
2129                 broken_f |= 1<<(*cp++ -'0');
2130             else
2131                 broken_f |= TRUE;
2132             continue;
2133 #ifndef PERL_XS
2134         case 'O':/* for Output file */
2135             file_out_f = TRUE;
2136             continue;
2137 #endif
2138         case 'c':/* add cr code */
2139             nlmode_f = CRLF;
2140             continue;
2141         case 'd':/* delete cr code */
2142             nlmode_f = LF;
2143             continue;
2144         case 'I':   /* ISO-2022-JP output */
2145             iso2022jp_f = TRUE;
2146             continue;
2147         case 'L':  /* line mode */
2148             if (*cp=='u') {         /* unix */
2149                 nlmode_f = LF; cp++;
2150             } else if (*cp=='m') { /* mac */
2151                 nlmode_f = CR; cp++;
2152             } else if (*cp=='w') { /* windows */
2153                 nlmode_f = CRLF; cp++;
2154             } else if (*cp=='0') { /* no conversion  */
2155                 nlmode_f = 0; cp++;
2156             }
2157             continue;
2158 #ifndef PERL_XS
2159         case 'g':
2160             if (*cp == '1') {
2161                 guess_f = 2;
2162                 cp++;
2163             } else if (*cp == '0') {
2164                 guess_f = 1;
2165                 cp++;
2166             } else {
2167                 guess_f = 1;
2168             }
2169             continue;
2170 #endif
2171         case SP:
2172         /* module muliple options in a string are allowed for Perl moudle  */
2173             while(*cp && *cp++!='-');
2174             continue;
2175         default:
2176             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
2177             /* bogus option but ignored */
2178             continue;
2179         }
2180     }
2181 }
2182
2183 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2184 {
2185     if (iconv_func){
2186         struct input_code *p = input_code_list;
2187         while (p->name){
2188             if (iconv_func == p->iconv_func){
2189                 return p;
2190             }
2191             p++;
2192         }
2193     }
2194     return 0;
2195 }
2196
2197 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2198 {
2199 #ifdef INPUT_CODE_FIX
2200     if (f || !input_f)
2201 #endif
2202         if (estab_f != f){
2203             estab_f = f;
2204         }
2205
2206     if (iconv_func
2207 #ifdef INPUT_CODE_FIX
2208         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
2209 #endif
2210         ){
2211         iconv = iconv_func;
2212     }
2213 #ifdef CHECK_OPTION
2214     if (estab_f && iconv_for_check != iconv){
2215         struct input_code *p = find_inputcode_byfunc(iconv);
2216         if (p){
2217             set_input_codename(p->name);
2218             debug(p->name);
2219         }
2220         iconv_for_check = iconv;
2221     }
2222 #endif
2223 }
2224
2225 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
2226 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
2227 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
2228 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
2229 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2230 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
2231 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
2232 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
2233
2234 #define SCORE_INIT (SCORE_iMIME)
2235
2236 static const char score_table_A0[] = {
2237     0, 0, 0, 0,
2238     0, 0, 0, 0,
2239     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2240     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2241 };
2242
2243 static const char score_table_F0[] = {
2244     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2245     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2246     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2247     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2248 };
2249
2250 void set_code_score(struct input_code *ptr, nkf_char score)
2251 {
2252     if (ptr){
2253         ptr->score |= score;
2254     }
2255 }
2256
2257 void clr_code_score(struct input_code *ptr, nkf_char score)
2258 {
2259     if (ptr){
2260         ptr->score &= ~score;
2261     }
2262 }
2263
2264 void code_score(struct input_code *ptr)
2265 {
2266     nkf_char c2 = ptr->buf[0];
2267 #ifdef UTF8_OUTPUT_ENABLE
2268     nkf_char c1 = ptr->buf[1];
2269 #endif
2270     if (c2 < 0){
2271         set_code_score(ptr, SCORE_ERROR);
2272     }else if (c2 == SSO){
2273         set_code_score(ptr, SCORE_KANA);
2274     }else if (c2 == 0x8f){
2275         set_code_score(ptr, SCORE_X0212);
2276 #ifdef UTF8_OUTPUT_ENABLE
2277     }else if (!e2w_conv(c2, c1)){
2278         set_code_score(ptr, SCORE_NO_EXIST);
2279 #endif
2280     }else if ((c2 & 0x70) == 0x20){
2281         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2282     }else if ((c2 & 0x70) == 0x70){
2283         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2284     }else if ((c2 & 0x70) >= 0x50){
2285         set_code_score(ptr, SCORE_L2);
2286     }
2287 }
2288
2289 void status_disable(struct input_code *ptr)
2290 {
2291     ptr->stat = -1;
2292     ptr->buf[0] = -1;
2293     code_score(ptr);
2294     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2295 }
2296
2297 void status_push_ch(struct input_code *ptr, nkf_char c)
2298 {
2299     ptr->buf[ptr->index++] = c;
2300 }
2301
2302 void status_clear(struct input_code *ptr)
2303 {
2304     ptr->stat = 0;
2305     ptr->index = 0;
2306 }
2307
2308 void status_reset(struct input_code *ptr)
2309 {
2310     status_clear(ptr);
2311     ptr->score = SCORE_INIT;
2312 }
2313
2314 void status_reinit(struct input_code *ptr)
2315 {
2316     status_reset(ptr);
2317     ptr->_file_stat = 0;
2318 }
2319
2320 void status_check(struct input_code *ptr, nkf_char c)
2321 {
2322     if (c <= DEL && estab_f){
2323         status_reset(ptr);
2324     }
2325 }
2326
2327 void s_status(struct input_code *ptr, nkf_char c)
2328 {
2329     switch(ptr->stat){
2330       case -1:
2331           status_check(ptr, c);
2332           break;
2333       case 0:
2334           if (c <= DEL){
2335               break;
2336 #ifdef NUMCHAR_OPTION
2337           }else if (is_unicode_capsule(c)){
2338               break;
2339 #endif
2340           }else if (0xa1 <= c && c <= 0xdf){
2341               status_push_ch(ptr, SSO);
2342               status_push_ch(ptr, c);
2343               code_score(ptr);
2344               status_clear(ptr);
2345           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2346               ptr->stat = 1;
2347               status_push_ch(ptr, c);
2348           }else if (0xed <= c && c <= 0xee){
2349               ptr->stat = 3;
2350               status_push_ch(ptr, c);
2351 #ifdef SHIFTJIS_CP932
2352           }else if (is_ibmext_in_sjis(c)){
2353               ptr->stat = 2;
2354               status_push_ch(ptr, c);
2355 #endif /* SHIFTJIS_CP932 */
2356 #ifdef X0212_ENABLE
2357           }else if (0xf0 <= c && c <= 0xfc){
2358               ptr->stat = 1;
2359               status_push_ch(ptr, c);
2360 #endif /* X0212_ENABLE */
2361           }else{
2362               status_disable(ptr);
2363           }
2364           break;
2365       case 1:
2366           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2367               status_push_ch(ptr, c);
2368               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2369               code_score(ptr);
2370               status_clear(ptr);
2371           }else{
2372               status_disable(ptr);
2373           }
2374           break;
2375       case 2:
2376 #ifdef SHIFTJIS_CP932
2377         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2378             status_push_ch(ptr, c);
2379             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2380                 set_code_score(ptr, SCORE_CP932);
2381                 status_clear(ptr);
2382                 break;
2383             }
2384         }
2385 #endif /* SHIFTJIS_CP932 */
2386         status_disable(ptr);
2387           break;
2388       case 3:
2389           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2390               status_push_ch(ptr, c);
2391               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2392             set_code_score(ptr, SCORE_CP932);
2393             status_clear(ptr);
2394           }else{
2395               status_disable(ptr);
2396           }
2397           break;
2398     }
2399 }
2400
2401 void e_status(struct input_code *ptr, nkf_char c)
2402 {
2403     switch (ptr->stat){
2404       case -1:
2405           status_check(ptr, c);
2406           break;
2407       case 0:
2408           if (c <= DEL){
2409               break;
2410 #ifdef NUMCHAR_OPTION
2411           }else if (is_unicode_capsule(c)){
2412               break;
2413 #endif
2414           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2415               ptr->stat = 1;
2416               status_push_ch(ptr, c);
2417 #ifdef X0212_ENABLE
2418           }else if (0x8f == c){
2419               ptr->stat = 2;
2420               status_push_ch(ptr, c);
2421 #endif /* X0212_ENABLE */
2422           }else{
2423               status_disable(ptr);
2424           }
2425           break;
2426       case 1:
2427           if (0xa1 <= c && c <= 0xfe){
2428               status_push_ch(ptr, c);
2429               code_score(ptr);
2430               status_clear(ptr);
2431           }else{
2432               status_disable(ptr);
2433           }
2434           break;
2435 #ifdef X0212_ENABLE
2436       case 2:
2437           if (0xa1 <= c && c <= 0xfe){
2438               ptr->stat = 1;
2439               status_push_ch(ptr, c);
2440           }else{
2441               status_disable(ptr);
2442           }
2443 #endif /* X0212_ENABLE */
2444     }
2445 }
2446
2447 #ifdef UTF8_INPUT_ENABLE
2448 void w_status(struct input_code *ptr, nkf_char c)
2449 {
2450     switch (ptr->stat){
2451       case -1:
2452           status_check(ptr, c);
2453           break;
2454       case 0:
2455           if (c <= DEL){
2456               break;
2457 #ifdef NUMCHAR_OPTION
2458           }else if (is_unicode_capsule(c)){
2459               break;
2460 #endif
2461           }else if (0xc0 <= c && c <= 0xdf){
2462               ptr->stat = 1;
2463               status_push_ch(ptr, c);
2464           }else if (0xe0 <= c && c <= 0xef){
2465               ptr->stat = 2;
2466               status_push_ch(ptr, c);
2467           }else if (0xf0 <= c && c <= 0xf4){
2468               ptr->stat = 3;
2469               status_push_ch(ptr, c);
2470           }else{
2471               status_disable(ptr);
2472           }
2473           break;
2474       case 1:
2475       case 2:
2476           if (0x80 <= c && c <= 0xbf){
2477               status_push_ch(ptr, c);
2478               if (ptr->index > ptr->stat){
2479                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2480                              && ptr->buf[2] == 0xbf);
2481                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2482                            &ptr->buf[0], &ptr->buf[1]);
2483                   if (!bom){
2484                       code_score(ptr);
2485                   }
2486                   status_clear(ptr);
2487               }
2488           }else{
2489               status_disable(ptr);
2490           }
2491           break;
2492       case 3:
2493         if (0x80 <= c && c <= 0xbf){
2494             if (ptr->index < ptr->stat){
2495                 status_push_ch(ptr, c);
2496             } else {
2497                 status_clear(ptr);
2498             }
2499           }else{
2500               status_disable(ptr);
2501           }
2502           break;
2503     }
2504 }
2505 #endif
2506
2507 void code_status(nkf_char c)
2508 {
2509     int action_flag = 1;
2510     struct input_code *result = 0;
2511     struct input_code *p = input_code_list;
2512     while (p->name){
2513         if (!p->status_func) {
2514             ++p;
2515             continue;
2516         }
2517         if (!p->status_func)
2518             continue;
2519         (p->status_func)(p, c);
2520         if (p->stat > 0){
2521             action_flag = 0;
2522         }else if(p->stat == 0){
2523             if (result){
2524                 action_flag = 0;
2525             }else{
2526                 result = p;
2527             }
2528         }
2529         ++p;
2530     }
2531
2532     if (action_flag){
2533         if (result && !estab_f){
2534             set_iconv(TRUE, result->iconv_func);
2535         }else if (c <= DEL){
2536             struct input_code *ptr = input_code_list;
2537             while (ptr->name){
2538                 status_reset(ptr);
2539                 ++ptr;
2540             }
2541         }
2542     }
2543 }
2544
2545 #ifndef WIN32DLL
2546 nkf_char std_getc(FILE *f)
2547 {
2548     if (std_gc_ndx){
2549         return std_gc_buf[--std_gc_ndx];
2550     }
2551     return getc(f);
2552 }
2553 #endif /*WIN32DLL*/
2554
2555 nkf_char std_ungetc(nkf_char c, FILE *f)
2556 {
2557     if (std_gc_ndx == STD_GC_BUFSIZE){
2558         return EOF;
2559     }
2560     std_gc_buf[std_gc_ndx++] = c;
2561     return c;
2562 }
2563
2564 #ifndef WIN32DLL
2565 void std_putc(nkf_char c)
2566 {
2567     if(c!=EOF)
2568       putchar(c);
2569 }
2570 #endif /*WIN32DLL*/
2571
2572 #if !defined(PERL_XS) && !defined(WIN32DLL)
2573 nkf_char noconvert(FILE *f)
2574 {
2575     nkf_char    c;
2576
2577     if (nop_f == 2)
2578         module_connection();
2579     while ((c = (*i_getc)(f)) != EOF)
2580       (*o_putc)(c);
2581     (*o_putc)(EOF);
2582     return 1;
2583 }
2584 #endif
2585
2586 void module_connection(void)
2587 {
2588     oconv = output_conv;
2589     o_putc = std_putc;
2590
2591     /* replace continucation module, from output side */
2592
2593     /* output redicrection */
2594 #ifdef CHECK_OPTION
2595     if (noout_f || guess_f){
2596         o_putc = no_putc;
2597     }
2598 #endif
2599     if (mimeout_f) {
2600         o_mputc = o_putc;
2601         o_putc = mime_putc;
2602         if (mimeout_f == TRUE) {
2603             o_base64conv = oconv; oconv = base64_conv;
2604         }
2605         /* base64_count = 0; */
2606     }
2607
2608     if (nlmode_f || guess_f) {
2609         o_nlconv = oconv; oconv = nl_conv;
2610     }
2611     if (rot_f) {
2612         o_rot_conv = oconv; oconv = rot_conv;
2613     }
2614     if (iso2022jp_f) {
2615         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2616     }
2617     if (hira_f) {
2618         o_hira_conv = oconv; oconv = hira_conv;
2619     }
2620     if (fold_f) {
2621         o_fconv = oconv; oconv = fold_conv;
2622         f_line = 0;
2623     }
2624     if (alpha_f || x0201_f) {
2625         o_zconv = oconv; oconv = z_conv;
2626     }
2627
2628     i_getc = std_getc;
2629     i_ungetc = std_ungetc;
2630     /* input redicrection */
2631 #ifdef INPUT_OPTION
2632     if (cap_f){
2633         i_cgetc = i_getc; i_getc = cap_getc;
2634         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2635     }
2636     if (url_f){
2637         i_ugetc = i_getc; i_getc = url_getc;
2638         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2639     }
2640 #endif
2641 #ifdef NUMCHAR_OPTION
2642     if (numchar_f){
2643         i_ngetc = i_getc; i_getc = numchar_getc;
2644         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2645     }
2646 #endif
2647 #ifdef UNICODE_NORMALIZATION
2648     if (nfc_f && input_f == UTF8_INPUT){
2649         i_nfc_getc = i_getc; i_getc = nfc_getc;
2650         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2651     }
2652 #endif
2653     if (mime_f && mimebuf_f==FIXED_MIME) {
2654         i_mgetc = i_getc; i_getc = mime_getc;
2655         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2656     }
2657     if (broken_f & 1) {
2658         i_bgetc = i_getc; i_getc = broken_getc;
2659         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2660     }
2661     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2662         set_iconv(-TRUE, e_iconv);
2663     } else if (input_f == SJIS_INPUT) {
2664         set_iconv(-TRUE, s_iconv);
2665 #ifdef UTF8_INPUT_ENABLE
2666     } else if (input_f == UTF8_INPUT) {
2667         set_iconv(-TRUE, w_iconv);
2668     } else if (input_f == UTF16_INPUT) {
2669         set_iconv(-TRUE, w_iconv16);
2670     } else if (input_f == UTF32_INPUT) {
2671         set_iconv(-TRUE, w_iconv32);
2672 #endif
2673     } else {
2674         set_iconv(FALSE, e_iconv);
2675     }
2676
2677     {
2678         struct input_code *p = input_code_list;
2679         while (p->name){
2680             status_reinit(p++);
2681         }
2682     }
2683 }
2684
2685 /*
2686  * Check and Ignore BOM
2687  */
2688 void check_bom(FILE *f)
2689 {
2690     int c2;
2691     switch(c2 = (*i_getc)(f)){
2692     case 0x00:
2693         if((c2 = (*i_getc)(f)) == 0x00){
2694             if((c2 = (*i_getc)(f)) == 0xFE){
2695                 if((c2 = (*i_getc)(f)) == 0xFF){
2696                     if(!input_f){
2697                         set_iconv(TRUE, w_iconv32);
2698                     }
2699                     if (iconv == w_iconv32) {
2700                         input_endian = ENDIAN_BIG;
2701                         return;
2702                     }
2703                     (*i_ungetc)(0xFF,f);
2704                 }else (*i_ungetc)(c2,f);
2705                 (*i_ungetc)(0xFE,f);
2706             }else if(c2 == 0xFF){
2707                 if((c2 = (*i_getc)(f)) == 0xFE){
2708                     if(!input_f){
2709                         set_iconv(TRUE, w_iconv32);
2710                     }
2711                     if (iconv == w_iconv32) {
2712                         input_endian = ENDIAN_2143;
2713                         return;
2714                     }
2715                     (*i_ungetc)(0xFF,f);
2716                 }else (*i_ungetc)(c2,f);
2717                 (*i_ungetc)(0xFF,f);
2718             }else (*i_ungetc)(c2,f);
2719             (*i_ungetc)(0x00,f);
2720         }else (*i_ungetc)(c2,f);
2721         (*i_ungetc)(0x00,f);
2722         break;
2723     case 0xEF:
2724         if((c2 = (*i_getc)(f)) == 0xBB){
2725             if((c2 = (*i_getc)(f)) == 0xBF){
2726                 if(!input_f){
2727                     set_iconv(TRUE, w_iconv);
2728                 }
2729                 if (iconv == w_iconv) {
2730                     return;
2731                 }
2732                 (*i_ungetc)(0xBF,f);
2733             }else (*i_ungetc)(c2,f);
2734             (*i_ungetc)(0xBB,f);
2735         }else (*i_ungetc)(c2,f);
2736         (*i_ungetc)(0xEF,f);
2737         break;
2738     case 0xFE:
2739         if((c2 = (*i_getc)(f)) == 0xFF){
2740             if((c2 = (*i_getc)(f)) == 0x00){
2741                 if((c2 = (*i_getc)(f)) == 0x00){
2742                     if(!input_f){
2743                         set_iconv(TRUE, w_iconv32);
2744                     }
2745                     if (iconv == w_iconv32) {
2746                         input_endian = ENDIAN_3412;
2747                         return;
2748                     }
2749                     (*i_ungetc)(0x00,f);
2750                 }else (*i_ungetc)(c2,f);
2751                 (*i_ungetc)(0x00,f);
2752             }else (*i_ungetc)(c2,f);
2753             if(!input_f){
2754                 set_iconv(TRUE, w_iconv16);
2755             }
2756             if (iconv == w_iconv16) {
2757                 input_endian = ENDIAN_BIG;
2758                 return;
2759             }
2760             (*i_ungetc)(0xFF,f);
2761         }else (*i_ungetc)(c2,f);
2762         (*i_ungetc)(0xFE,f);
2763         break;
2764     case 0xFF:
2765         if((c2 = (*i_getc)(f)) == 0xFE){
2766             if((c2 = (*i_getc)(f)) == 0x00){
2767                 if((c2 = (*i_getc)(f)) == 0x00){
2768                     if(!input_f){
2769                         set_iconv(TRUE, w_iconv32);
2770                     }
2771                     if (iconv == w_iconv32) {
2772                         input_endian = ENDIAN_LITTLE;
2773                         return;
2774                     }
2775                     (*i_ungetc)(0x00,f);
2776                 }else (*i_ungetc)(c2,f);
2777                 (*i_ungetc)(0x00,f);
2778             }else (*i_ungetc)(c2,f);
2779             if(!input_f){
2780                 set_iconv(TRUE, w_iconv16);
2781             }
2782             if (iconv == w_iconv16) {
2783                 input_endian = ENDIAN_LITTLE;
2784                 return;
2785             }
2786             (*i_ungetc)(0xFE,f);
2787         }else (*i_ungetc)(c2,f);
2788         (*i_ungetc)(0xFF,f);
2789         break;
2790     default:
2791         (*i_ungetc)(c2,f);
2792         break;
2793     }
2794 }
2795
2796 /*
2797    Conversion main loop. Code detection only.
2798  */
2799
2800 nkf_char kanji_convert(FILE *f)
2801 {
2802     nkf_char    c3, c2=0, c1, c0=0;
2803     int is_8bit = FALSE;
2804
2805     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2806 #ifdef UTF8_INPUT_ENABLE
2807        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2808 #endif
2809       ){
2810         is_8bit = TRUE;
2811     }
2812
2813     input_mode = ASCII;
2814     output_mode = ASCII;
2815     shift_mode = FALSE;
2816
2817 #define NEXT continue      /* no output, get next */
2818 #define SEND ;             /* output c1 and c2, get next */
2819 #define LAST break         /* end of loop, go closing  */
2820
2821     module_connection();
2822     check_bom(f);
2823
2824     while ((c1 = (*i_getc)(f)) != EOF) {
2825 #ifdef INPUT_CODE_FIX
2826         if (!input_f)
2827 #endif
2828             code_status(c1);
2829         if (c2) {
2830             /* second byte */
2831             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2832                 /* in case of 8th bit is on */
2833                 if (!estab_f&&!mime_decode_mode) {
2834                     /* in case of not established yet */
2835                     /* It is still ambiguious */
2836                     if (h_conv(f, c2, c1)==EOF)
2837                         LAST;
2838                     else
2839                         c2 = 0;
2840                     NEXT;
2841                 } else {
2842                     /* in case of already established */
2843                     if (c1 < AT) {
2844                         /* ignore bogus code and not CP5022x UCD */
2845                         c2 = 0;
2846                         NEXT;
2847                     } else {
2848                         SEND;
2849                     }
2850                 }
2851             } else
2852                 /* second byte, 7 bit code */
2853                 /* it might be kanji shitfted */
2854                 if ((c1 == DEL) || (c1 <= SP)) {
2855                     /* ignore bogus first code */
2856                     c2 = 0;
2857                     NEXT;
2858                 } else
2859                     SEND;
2860         } else {
2861             /* first byte */
2862 #ifdef UTF8_INPUT_ENABLE
2863             if (iconv == w_iconv16) {
2864                 if (input_endian == ENDIAN_BIG) {
2865                     c2 = c1;
2866                     if ((c1 = (*i_getc)(f)) != EOF) {
2867                         if (0xD8 <= c2 && c2 <= 0xDB) {
2868                             if ((c0 = (*i_getc)(f)) != EOF) {
2869                                 c0 <<= 8;
2870                                 if ((c3 = (*i_getc)(f)) != EOF) {
2871                                     c0 |= c3;
2872                                 } else c2 = EOF;
2873                             } else c2 = EOF;
2874                         }
2875                     } else c2 = EOF;
2876                 } else {
2877                     if ((c2 = (*i_getc)(f)) != EOF) {
2878                         if (0xD8 <= c2 && c2 <= 0xDB) {
2879                             if ((c3 = (*i_getc)(f)) != EOF) {
2880                                 if ((c0 = (*i_getc)(f)) != EOF) {
2881                                     c0 <<= 8;
2882                                     c0 |= c3;
2883                                 } else c2 = EOF;
2884                             } else c2 = EOF;
2885                         }
2886                     } else c2 = EOF;
2887                 }
2888                 SEND;
2889             } else if(iconv == w_iconv32){
2890                 int c3 = c1;
2891                 if((c2 = (*i_getc)(f)) != EOF &&
2892                    (c1 = (*i_getc)(f)) != EOF &&
2893                    (c0 = (*i_getc)(f)) != EOF){
2894                     switch(input_endian){
2895                     case ENDIAN_BIG:
2896                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2897                         break;
2898                     case ENDIAN_LITTLE:
2899                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2900                         break;
2901                     case ENDIAN_2143:
2902                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2903                         break;
2904                     case ENDIAN_3412:
2905                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2906                         break;
2907                     }
2908                     c2 = 0;
2909                 }else{
2910                     c2 = EOF;
2911                 }
2912                 SEND;
2913             } else
2914 #endif
2915 #ifdef NUMCHAR_OPTION
2916             if (is_unicode_capsule(c1)){
2917                 SEND;
2918             } else
2919 #endif
2920             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2921                 /* 8 bit code */
2922                 if (!estab_f && !iso8859_f) {
2923                     /* not established yet */
2924                     c2 = c1;
2925                     NEXT;
2926                 } else { /* estab_f==TRUE */
2927                     if (iso8859_f) {
2928                         c2 = ISO_8859_1;
2929                         c1 &= 0x7f;
2930                         SEND;
2931                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2932                         /* SJIS X0201 Case... */
2933                         if (iso2022jp_f && !x0201_f) {
2934                             (*oconv)(GETA1, GETA2);
2935                             NEXT;
2936                         } else {
2937                             c2 = JIS_X_0201;
2938                             c1 &= 0x7f;
2939                             SEND;
2940                         }
2941                     } else if (c1==SSO && iconv != s_iconv) {
2942                         /* EUC X0201 Case */
2943                         c1 = (*i_getc)(f);  /* skip SSO */
2944                         code_status(c1);
2945                         if (SSP<=c1 && c1<0xe0) {
2946                             if (iso2022jp_f && !x0201_f) {
2947                                 (*oconv)(GETA1, GETA2);
2948                                 NEXT;
2949                             } else {
2950                                 c2 = JIS_X_0201;
2951                                 c1 &= 0x7f;
2952                                 SEND;
2953                             }
2954                         } else  { /* bogus code, skip SSO and one byte */
2955                             NEXT;
2956                         }
2957                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2958                                (c1 == 0xFD || c1 == 0xFE)) {
2959                         /* CP10001 */
2960                         c2 = JIS_X_0201;
2961                         c1 &= 0x7f;
2962                         SEND;
2963                     } else {
2964                        /* already established */
2965                        c2 = c1;
2966                        NEXT;
2967                     }
2968                 }
2969             } else if ((c1 > SP) && (c1 != DEL)) {
2970                 /* in case of Roman characters */
2971                 if (shift_mode) {
2972                     /* output 1 shifted byte */
2973                     if (iso8859_f) {
2974                         c2 = ISO_8859_1;
2975                         SEND;
2976                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2977                       /* output 1 shifted byte */
2978                         if (iso2022jp_f && !x0201_f) {
2979                             (*oconv)(GETA1, GETA2);
2980                             NEXT;
2981                         } else {
2982                             c2 = JIS_X_0201;
2983                             SEND;
2984                         }
2985                     } else {
2986                         /* look like bogus code */
2987                         NEXT;
2988                     }
2989                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
2990                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
2991                     /* in case of Kanji shifted */
2992                     c2 = c1;
2993                     NEXT;
2994                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2995                     /* Check MIME code */
2996                     if ((c1 = (*i_getc)(f)) == EOF) {
2997                         (*oconv)(0, '=');
2998                         LAST;
2999                     } else if (c1 == '?') {
3000                         /* =? is mime conversion start sequence */
3001                         if(mime_f == STRICT_MIME) {
3002                             /* check in real detail */
3003                             if (mime_begin_strict(f) == EOF)
3004                                 LAST;
3005                             else
3006                                 NEXT;
3007                         } else if (mime_begin(f) == EOF)
3008                             LAST;
3009                         else
3010                             NEXT;
3011                     } else {
3012                         (*oconv)(0, '=');
3013                         (*i_ungetc)(c1,f);
3014                         NEXT;
3015                     }
3016                 } else {
3017                     /* normal ASCII code */
3018                     SEND;
3019                 }
3020             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
3021                 shift_mode = FALSE;
3022                 NEXT;
3023             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
3024                 shift_mode = TRUE;
3025                 NEXT;
3026             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
3027                 if ((c1 = (*i_getc)(f)) == EOF) {
3028                     /*  (*oconv)(0, ESC); don't send bogus code */
3029                     LAST;
3030                 } else if (c1 == '$') {
3031                     if ((c1 = (*i_getc)(f)) == EOF) {
3032                         /*
3033                         (*oconv)(0, ESC); don't send bogus code
3034                         (*oconv)(0, '$'); */
3035                         LAST;
3036                     } else if (c1 == '@'|| c1 == 'B') {
3037                         /* This is kanji introduction */
3038                         input_mode = JIS_X_0208;
3039                         shift_mode = FALSE;
3040                         set_input_codename("ISO-2022-JP");
3041 #ifdef CHECK_OPTION
3042                         debug("ISO-2022-JP");
3043 #endif
3044                         NEXT;
3045                     } else if (c1 == '(') {
3046                         if ((c1 = (*i_getc)(f)) == EOF) {
3047                             /* don't send bogus code
3048                             (*oconv)(0, ESC);
3049                             (*oconv)(0, '$');
3050                             (*oconv)(0, '(');
3051                                 */
3052                             LAST;
3053                         } else if (c1 == '@'|| c1 == 'B') {
3054                             /* This is kanji introduction */
3055                             input_mode = JIS_X_0208;
3056                             shift_mode = FALSE;
3057                             NEXT;
3058 #ifdef X0212_ENABLE
3059                         } else if (c1 == 'D'){
3060                             input_mode = JIS_X_0212;
3061                             shift_mode = FALSE;
3062                             NEXT;
3063 #endif /* X0212_ENABLE */
3064                         } else if (c1 == 0x4F){
3065                             input_mode = JIS_X_0213_1;
3066                             shift_mode = FALSE;
3067                             NEXT;
3068                         } else if (c1 == 0x50){
3069                             input_mode = JIS_X_0213_2;
3070                             shift_mode = FALSE;
3071                             NEXT;
3072                         } else {
3073                             /* could be some special code */
3074                             (*oconv)(0, ESC);
3075                             (*oconv)(0, '$');
3076                             (*oconv)(0, '(');
3077                             (*oconv)(0, c1);
3078                             NEXT;
3079                         }
3080                     } else if (broken_f&0x2) {
3081                         /* accept any ESC-(-x as broken code ... */
3082                         input_mode = JIS_X_0208;
3083                         shift_mode = FALSE;
3084                         NEXT;
3085                     } else {
3086                         (*oconv)(0, ESC);
3087                         (*oconv)(0, '$');
3088                         (*oconv)(0, c1);
3089                         NEXT;
3090                     }
3091                 } else if (c1 == '(') {
3092                     if ((c1 = (*i_getc)(f)) == EOF) {
3093                         /* don't send bogus code
3094                         (*oconv)(0, ESC);
3095                         (*oconv)(0, '('); */
3096                         LAST;
3097                     } else {
3098                         if (c1 == 'I') {
3099                             /* This is X0201 kana introduction */
3100                             input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
3101                             NEXT;
3102                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
3103                             /* This is X0208 kanji introduction */
3104                             input_mode = ASCII; shift_mode = FALSE;
3105                             NEXT;
3106                         } else if (broken_f&0x2) {
3107                             input_mode = ASCII; shift_mode = FALSE;
3108                             NEXT;
3109                         } else {
3110                             (*oconv)(0, ESC);
3111                             (*oconv)(0, '(');
3112                             /* maintain various input_mode here */
3113                             SEND;
3114                         }
3115                     }
3116                } else if ( c1 == 'N' || c1 == 'n'){
3117                    /* SS2 */
3118                    c3 = (*i_getc)(f);  /* skip SS2 */
3119                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
3120                        c1 = c3;
3121                        c2 = JIS_X_0201;
3122                        SEND;
3123                    }else{
3124                        (*i_ungetc)(c3, f);
3125                        /* lonely ESC  */
3126                        (*oconv)(0, ESC);
3127                        SEND;
3128                    }
3129                 } else {
3130                     /* lonely ESC  */
3131                     (*oconv)(0, ESC);
3132                     SEND;
3133                 }
3134             } else if (c1 == ESC && iconv == s_iconv) {
3135                 /* ESC in Shift_JIS */
3136                 if ((c1 = (*i_getc)(f)) == EOF) {
3137                     /*  (*oconv)(0, ESC); don't send bogus code */
3138                     LAST;
3139                 } else if (c1 == '$') {
3140                     /* J-PHONE emoji */
3141                     if ((c1 = (*i_getc)(f)) == EOF) {
3142                         /*
3143                            (*oconv)(0, ESC); don't send bogus code
3144                            (*oconv)(0, '$'); */
3145                         LAST;
3146                     } else {
3147                         if (('E' <= c1 && c1 <= 'G') ||
3148                             ('O' <= c1 && c1 <= 'Q')) {
3149                             /*
3150                                NUM : 0 1 2 3 4 5
3151                                BYTE: G E F O P Q
3152                                C%7 : 1 6 0 2 3 4
3153                                C%7 : 0 1 2 3 4 5 6
3154                                NUM : 2 0 3 4 5 X 1
3155                              */
3156                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
3157                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
3158                             while ((c1 = (*i_getc)(f)) != EOF) {
3159                                 if (SP <= c1 && c1 <= 'z') {
3160                                     (*oconv)(0, c1 + c0);
3161                                 } else break; /* c1 == SO */
3162                             }
3163                         }
3164                     }
3165                     if (c1 == EOF) LAST;
3166                     NEXT;
3167                 } else {
3168                     /* lonely ESC  */
3169                     (*oconv)(0, ESC);
3170                     SEND;
3171                 }
3172             } else if (c1 == LF || c1 == CR) {
3173                 if (broken_f&4) {
3174                     input_mode = ASCII; set_iconv(FALSE, 0);
3175                     SEND;
3176                 } else if (mime_decode_f && !mime_decode_mode){
3177                     if (c1 == LF) {
3178                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
3179                             i_ungetc(SP,f);
3180                             continue;
3181                         } else {
3182                             i_ungetc(c1,f);
3183                         }
3184                         c1 = LF;
3185                         SEND;
3186                     } else  { /* if (c1 == CR)*/
3187                         if ((c1=(*i_getc)(f))!=EOF) {
3188                             if (c1==SP) {
3189                                 i_ungetc(SP,f);
3190                                 continue;
3191                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
3192                                 i_ungetc(SP,f);
3193                                 continue;
3194                             } else {
3195                                 i_ungetc(c1,f);
3196                             }
3197                             i_ungetc(LF,f);
3198                         } else {
3199                             i_ungetc(c1,f);
3200                         }
3201                         c1 = CR;
3202                         SEND;
3203                     }
3204                 }
3205             } else if (c1 == DEL && input_mode == JIS_X_0208) {
3206                 /* CP5022x */
3207                 c2 = c1;
3208                 NEXT;
3209             } else
3210                 SEND;
3211         }
3212         /* send: */
3213         switch(input_mode){
3214         case ASCII:
3215             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
3216             case -2:
3217                 /* 4 bytes UTF-8 */
3218                 if ((c0 = (*i_getc)(f)) != EOF) {
3219                     code_status(c0);
3220                     c0 <<= 8;
3221                     if ((c3 = (*i_getc)(f)) != EOF) {
3222                         code_status(c3);
3223                         (*iconv)(c2, c1, c0|c3);
3224                     }
3225                 }
3226                 break;
3227             case -1:
3228                 /* 3 bytes EUC or UTF-8 */
3229                 if ((c0 = (*i_getc)(f)) != EOF) {
3230                     code_status(c0);
3231                     (*iconv)(c2, c1, c0);
3232                 }
3233                 break;
3234             }
3235             break;
3236         case JIS_X_0208:
3237         case JIS_X_0213_1:
3238             if (ms_ucs_map_f &&
3239                 0x7F <= c2 && c2 <= 0x92 &&
3240                 0x21 <= c1 && c1 <= 0x7E) {
3241                 /* CP932 UDC */
3242                 if(c1 == 0x7F) return 0;
3243                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
3244                 c2 = 0;
3245             }
3246             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
3247             break;
3248 #ifdef X0212_ENABLE
3249         case JIS_X_0212:
3250             (*oconv)(PREFIX_EUCG3 | c2, c1);
3251             break;
3252 #endif /* X0212_ENABLE */
3253         case JIS_X_0213_2:
3254             (*oconv)(PREFIX_EUCG3 | c2, c1);
3255             break;
3256         default:
3257             (*oconv)(input_mode, c1);  /* other special case */
3258         }
3259
3260         c2 = 0;
3261         c0 = 0;
3262         continue;
3263         /* goto next_word */
3264     }
3265
3266     /* epilogue */
3267     (*iconv)(EOF, 0, 0);
3268     if (!input_codename)
3269     {
3270         if (is_8bit) {
3271             struct input_code *p = input_code_list;
3272             struct input_code *result = p;
3273             while (p->name){
3274                 if (p->score < result->score) result = p;
3275                 ++p;
3276             }
3277             set_input_codename(result->name);
3278 #ifdef CHECK_OPTION
3279             debug(result->name);
3280 #endif
3281         }
3282     }
3283     return 1;
3284 }
3285
3286 nkf_char
3287 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3288 {
3289     nkf_char ret, c3, c0;
3290     int hold_index;
3291
3292
3293     /** it must NOT be in the kanji shifte sequence      */
3294     /** it must NOT be written in JIS7                   */
3295     /** and it must be after 2 byte 8bit code            */
3296
3297     hold_count = 0;
3298     push_hold_buf(c2);
3299     push_hold_buf(c1);
3300
3301     while ((c1 = (*i_getc)(f)) != EOF) {
3302         if (c1 == ESC){
3303             (*i_ungetc)(c1,f);
3304             break;
3305         }
3306         code_status(c1);
3307         if (push_hold_buf(c1) == EOF || estab_f){
3308             break;
3309         }
3310     }
3311
3312     if (!estab_f){
3313         struct input_code *p = input_code_list;
3314         struct input_code *result = p;
3315         if (c1 == EOF){
3316             code_status(c1);
3317         }
3318         while (p->name){
3319             if (p->status_func && p->score < result->score){
3320                 result = p;
3321             }
3322             ++p;
3323         }
3324         set_iconv(TRUE, result->iconv_func);
3325     }
3326
3327
3328     /** now,
3329      ** 1) EOF is detected, or
3330      ** 2) Code is established, or
3331      ** 3) Buffer is FULL (but last word is pushed)
3332      **
3333      ** in 1) and 3) cases, we continue to use
3334      ** Kanji codes by oconv and leave estab_f unchanged.
3335      **/
3336
3337     ret = c1;
3338     hold_index = 0;
3339     while (hold_index < hold_count){
3340         c2 = hold_buf[hold_index++];
3341         if (c2 <= DEL
3342 #ifdef NUMCHAR_OPTION
3343             || is_unicode_capsule(c2)
3344 #endif
3345             ){
3346             (*iconv)(0, c2, 0);
3347             continue;
3348         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3349             (*iconv)(JIS_X_0201, c2, 0);
3350             continue;
3351         }
3352         if (hold_index < hold_count){
3353             c1 = hold_buf[hold_index++];
3354         }else{
3355             c1 = (*i_getc)(f);
3356             if (c1 == EOF){
3357                 c3 = EOF;
3358                 break;
3359             }
3360             code_status(c1);
3361         }
3362         c0 = 0;
3363         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3364         case -2:
3365             /* 4 bytes UTF-8 */
3366             if (hold_index < hold_count){
3367                 c0 = hold_buf[hold_index++];
3368             } else if ((c0 = (*i_getc)(f)) == EOF) {
3369                 ret = EOF;
3370                 break;
3371             } else {
3372                 code_status(c0);
3373                 c0 <<= 8;
3374                 if (hold_index < hold_count){
3375                     c3 = hold_buf[hold_index++];
3376                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3377                     c0 = ret = EOF;
3378                     break;
3379                 } else {
3380                     code_status(c3);
3381                     (*iconv)(c2, c1, c0|c3);
3382                 }
3383             }
3384             break;
3385         case -1:
3386             /* 3 bytes EUC or UTF-8 */
3387             if (hold_index < hold_count){
3388                 c0 = hold_buf[hold_index++];
3389             } else if ((c0 = (*i_getc)(f)) == EOF) {
3390                 ret = EOF;
3391                 break;
3392             } else {
3393                 code_status(c0);
3394             }
3395             (*iconv)(c2, c1, c0);
3396             break;
3397         }
3398         if (c0 == EOF) break;
3399     }
3400     return ret;
3401 }
3402
3403 nkf_char push_hold_buf(nkf_char c2)
3404 {
3405     if (hold_count >= HOLD_SIZE*2)
3406         return (EOF);
3407     hold_buf[hold_count++] = (unsigned char)c2;
3408     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3409 }
3410
3411 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3412 {
3413 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3414     nkf_char val;
3415 #endif
3416     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3417 #ifdef SHIFTJIS_CP932
3418     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3419         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3420         if (val){
3421             c2 = val >> 8;
3422             c1 = val & 0xff;
3423         }
3424     }
3425     if (cp932inv_f
3426         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3427         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3428         if (c){
3429             c2 = c >> 8;
3430             c1 = c & 0xff;
3431         }
3432     }
3433 #endif /* SHIFTJIS_CP932 */
3434 #ifdef X0212_ENABLE
3435     if (!x0213_f && is_ibmext_in_sjis(c2)){
3436         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3437         if (val){
3438             if (val > 0x7FFF){
3439                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3440                 c1 = val & 0xff;
3441             }else{
3442                 c2 = val >> 8;
3443                 c1 = val & 0xff;
3444             }
3445             if (p2) *p2 = c2;
3446             if (p1) *p1 = c1;
3447             return 0;
3448         }
3449     }
3450 #endif
3451     if(c2 >= 0x80){
3452         if(x0213_f && c2 >= 0xF0){
3453             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3454                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3455             }else{ /* 78<=k<=94 */
3456                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3457                 if (0x9E < c1) c2++;
3458             }
3459         }else{
3460             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3461             if (0x9E < c1) c2++;
3462         }
3463         if (c1 < 0x9F)
3464             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3465         else {
3466             c1 = c1 - 0x7E;
3467         }
3468     }
3469
3470 #ifdef X0212_ENABLE
3471     c2 = x0212_unshift(c2);
3472 #endif
3473     if (p2) *p2 = c2;
3474     if (p1) *p1 = c1;
3475     return 0;
3476 }
3477
3478 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3479 {
3480     if (c2 == JIS_X_0201) {
3481         c1 &= 0x7f;
3482     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3483         /* NOP */
3484     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3485         /* CP932 UDC */
3486         if(c1 == 0x7F) return 0;
3487         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3488         c2 = 0;
3489     } else {
3490         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3491         if (ret) return ret;
3492     }
3493     (*oconv)(c2, c1);
3494     return 0;
3495 }
3496
3497 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3498 {
3499     if (c2 == JIS_X_0201) {
3500         c1 &= 0x7f;
3501 #ifdef X0212_ENABLE
3502     }else if (c2 == 0x8f){
3503         if (c0 == 0){
3504             return -1;
3505         }
3506         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3507             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3508             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3509             c2 = 0;
3510         } else {
3511             c2 = (c2 << 8) | (c1 & 0x7f);
3512             c1 = c0 & 0x7f;
3513 #ifdef SHIFTJIS_CP932
3514             if (cp51932_f){
3515                 nkf_char s2, s1;
3516                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3517                     s2e_conv(s2, s1, &c2, &c1);
3518                     if (c2 < 0x100){
3519                         c1 &= 0x7f;
3520                         c2 &= 0x7f;
3521                     }
3522                 }
3523             }
3524 #endif /* SHIFTJIS_CP932 */
3525         }
3526 #endif /* X0212_ENABLE */
3527     } else if (c2 == SSO){
3528         c2 = JIS_X_0201;
3529         c1 &= 0x7f;
3530     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3531         /* NOP */
3532     } else {
3533         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3534             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3535             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3536             c2 = 0;
3537         } else {
3538             c1 &= 0x7f;
3539             c2 &= 0x7f;
3540 #ifdef SHIFTJIS_CP932
3541             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3542                 nkf_char s2, s1;
3543                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3544                     s2e_conv(s2, s1, &c2, &c1);
3545                     if (c2 < 0x100){
3546                         c1 &= 0x7f;
3547                         c2 &= 0x7f;
3548                     }
3549                 }
3550             }
3551 #endif /* SHIFTJIS_CP932 */
3552         }
3553     }
3554     (*oconv)(c2, c1);
3555     return 0;
3556 }
3557
3558 #ifdef UTF8_INPUT_ENABLE
3559 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3560 {
3561     nkf_char ret = 0;
3562
3563     if (!c1){
3564         *p2 = 0;
3565         *p1 = c2;
3566     }else if (0xc0 <= c2 && c2 <= 0xef) {
3567         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3568 #ifdef NUMCHAR_OPTION
3569         if (ret > 0){
3570             if (p2) *p2 = 0;
3571             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3572             ret = 0;
3573         }
3574 #endif
3575     }
3576     return ret;
3577 }
3578
3579 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3580 {
3581     nkf_char ret = 0;
3582     static const char w_iconv_utf8_1st_byte[] =
3583     { /* 0xC0 - 0xFF */
3584         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3585         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3586         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3587         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3588
3589     if (c2 < 0 || 0xff < c2) {
3590     }else if (c2 == 0) { /* 0 : 1 byte*/
3591         c0 = 0;
3592     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3593         return 0;
3594     } else{
3595         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3596         case 21:
3597             if (c1 < 0x80 || 0xBF < c1) return 0;
3598             break;
3599         case 30:
3600             if (c0 == 0) return -1;
3601             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3602                 return 0;
3603             break;
3604         case 31:
3605         case 33:
3606             if (c0 == 0) return -1;
3607             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3608                 return 0;
3609             break;
3610         case 32:
3611             if (c0 == 0) return -1;
3612             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3613                 return 0;
3614             break;
3615         case 40:
3616             if (c0 == 0) return -2;
3617             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3618                 return 0;
3619             break;
3620         case 41:
3621             if (c0 == 0) return -2;
3622             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3623                 return 0;
3624             break;
3625         case 42:
3626             if (c0 == 0) return -2;
3627             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3628                 return 0;
3629             break;
3630         default:
3631             return 0;
3632             break;
3633         }
3634     }
3635     if (c2 == 0 || c2 == EOF){
3636     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3637         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3638         c2 = 0;
3639     } else {
3640         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3641     }
3642     if (ret == 0){
3643         (*oconv)(c2, c1);
3644     }
3645     return ret;
3646 }
3647 #endif
3648
3649 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3650 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3651 {
3652     val &= VALUE_MASK;
3653     if (val < 0x80){
3654         *p2 = val;
3655         *p1 = 0;
3656         *p0 = 0;
3657     }else if (val < 0x800){
3658         *p2 = 0xc0 | (val >> 6);
3659         *p1 = 0x80 | (val & 0x3f);
3660         *p0 = 0;
3661     } else if (val <= NKF_INT32_C(0xFFFF)) {
3662         *p2 = 0xe0 | (val >> 12);
3663         *p1 = 0x80 | ((val >> 6) & 0x3f);
3664         *p0 = 0x80 | (val        & 0x3f);
3665     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3666         *p2 = 0xe0 |  (val >> 16);
3667         *p1 = 0x80 | ((val >> 12) & 0x3f);
3668         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3669     } else {
3670         *p2 = 0;
3671         *p1 = 0;
3672         *p0 = 0;
3673     }
3674 }
3675 #endif
3676
3677 #ifdef UTF8_INPUT_ENABLE
3678 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3679 {
3680     nkf_char val;
3681     if (c2 >= 0xf8) {
3682         val = -1;
3683     } else if (c2 >= 0xf0){
3684         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3685         val = (c2 & 0x0f) << 18;
3686         val |= (c1 & 0x3f) << 12;
3687         val |= (c0 & 0x3f00) >> 2;
3688         val |= (c0 & 0x3f);
3689     }else if (c2 >= 0xe0){
3690         val = (c2 & 0x0f) << 12;
3691         val |= (c1 & 0x3f) << 6;
3692         val |= (c0 & 0x3f);
3693     }else if (c2 >= 0xc0){
3694         val = (c2 & 0x1f) << 6;
3695         val |= (c1 & 0x3f);
3696     }else{
3697         val = c2;
3698     }
3699     return val;
3700 }
3701
3702 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3703 {
3704     nkf_char c2, c1, c0;
3705     nkf_char ret = 0;
3706     val &= VALUE_MASK;
3707     if (val < 0x80){
3708         *p2 = 0;
3709         *p1 = val;
3710     }else{
3711         w16w_conv(val, &c2, &c1, &c0);
3712         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3713 #ifdef NUMCHAR_OPTION
3714         if (ret > 0){
3715             *p2 = 0;
3716             *p1 = CLASS_UNICODE | val;
3717             ret = 0;
3718         }
3719 #endif
3720     }
3721     return ret;
3722 }
3723 #endif
3724
3725 #ifdef UTF8_INPUT_ENABLE
3726 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3727 {
3728     nkf_char ret = 0;
3729     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3730         (*oconv)(c2, c1);
3731         return 0;
3732     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3733         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3734             return -2;
3735         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3736         c2 = 0;
3737     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3738         /*
3739            return 2;
3740         */
3741         return 1;
3742     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3743     if (ret) return ret;
3744     (*oconv)(c2, c1);
3745     return 0;
3746 }
3747
3748 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3749 {
3750     int ret = 0;
3751
3752     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3753     } else if (is_unicode_bmp(c1)) {
3754         ret = w16e_conv(c1, &c2, &c1);
3755     } else {
3756         c2 = 0;
3757         c1 =  CLASS_UNICODE | c1;
3758     }
3759     if (ret) return ret;
3760     (*oconv)(c2, c1);
3761     return 0;
3762 }
3763
3764 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3765 {
3766     const unsigned short *const *pp;
3767     const unsigned short *const *const *ppp;
3768     static const char no_best_fit_chars_table_C2[] =
3769     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3770         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3771         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3772         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3773     static const char no_best_fit_chars_table_C2_ms[] =
3774     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3775         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3776         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3777         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3778     static const char no_best_fit_chars_table_932_C2[] =
3779     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3780         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3781         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3782         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3783     static const char no_best_fit_chars_table_932_C3[] =
3784     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3785         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3786         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3787         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3788     nkf_char ret = 0;
3789
3790     if(c2 < 0x80){
3791         *p2 = 0;
3792         *p1 = c2;
3793     }else if(c2 < 0xe0){
3794         if(no_best_fit_chars_f){
3795             if(ms_ucs_map_f == UCS_MAP_CP932){
3796                 switch(c2){
3797                 case 0xC2:
3798                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3799                     break;
3800                 case 0xC3:
3801                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3802                     break;
3803                 }
3804             }else if(!cp932inv_f){
3805                 switch(c2){
3806                 case 0xC2:
3807                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3808                     break;
3809                 case 0xC3:
3810                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3811                     break;
3812                 }
3813             }else if(ms_ucs_map_f == UCS_MAP_MS){
3814                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3815             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3816                 switch(c2){
3817                 case 0xC2:
3818                     switch(c1){
3819                     case 0xA2:
3820                     case 0xA3:
3821                     case 0xA5:
3822                     case 0xA6:
3823                     case 0xAC:
3824                     case 0xAF:
3825                     case 0xB8:
3826                         return 1;
3827                     }
3828                     break;
3829                 }
3830             }
3831         }
3832         pp =
3833             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3834             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3835             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3836             utf8_to_euc_2bytes;
3837         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3838     }else if(c0 < 0xF0){
3839         if(no_best_fit_chars_f){
3840             if(ms_ucs_map_f == UCS_MAP_CP932){
3841                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3842             }else if(ms_ucs_map_f == UCS_MAP_MS){
3843                 switch(c2){
3844                 case 0xE2:
3845                     switch(c1){
3846                     case 0x80:
3847                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3848                         break;
3849                     case 0x88:
3850                         if(c0 == 0x92) return 1;
3851                         break;
3852                     }
3853                     break;
3854                 case 0xE3:
3855                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3856                     break;
3857                 }
3858             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3859                 switch(c2){
3860                 case 0xE3:
3861                     switch(c1){
3862                     case 0x82:
3863                             if(c0 == 0x94) return 1;
3864                         break;
3865                     case 0x83:
3866                             if(c0 == 0xBB) return 1;
3867                         break;
3868                     }
3869                     break;
3870                 }
3871             }else{
3872                 switch(c2){
3873                 case 0xE2:
3874                     switch(c1){
3875                     case 0x80:
3876                         if(c0 == 0x95) return 1;
3877                         break;
3878                     case 0x88:
3879                         if(c0 == 0xA5) return 1;
3880                         break;
3881                     }
3882                     break;
3883                 case 0xEF:
3884                     switch(c1){
3885                     case 0xBC:
3886                         if(c0 == 0x8D) return 1;
3887                         break;
3888                     case 0xBD:
3889                         if(c0 == 0x9E && !cp932inv_f) return 1;
3890                         break;
3891                     case 0xBF:
3892                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3893                         break;
3894                     }
3895                     break;
3896                 }
3897             }
3898         }
3899         ppp =
3900             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3901             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3902             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3903             utf8_to_euc_3bytes;
3904         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3905     }else return -1;
3906 #ifdef SHIFTJIS_CP932
3907     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3908         nkf_char s2, s1;
3909         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3910             s2e_conv(s2, s1, p2, p1);
3911         }else{
3912             ret = 1;
3913         }
3914     }
3915 #endif
3916     return ret;
3917 }
3918
3919 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3920 {
3921     nkf_char c2;
3922     const unsigned short *p;
3923     unsigned short val;
3924
3925     if (pp == 0) return 1;
3926
3927     c1 -= 0x80;
3928     if (c1 < 0 || psize <= c1) return 1;
3929     p = pp[c1];
3930     if (p == 0)  return 1;
3931
3932     c0 -= 0x80;
3933     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3934     val = p[c0];
3935     if (val == 0) return 1;
3936     if (no_cp932ext_f && (
3937         (val>>8) == 0x2D || /* NEC special characters */
3938         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3939         )) return 1;
3940
3941     c2 = val >> 8;
3942    if (val > 0x7FFF){
3943         c2 &= 0x7f;
3944         c2 |= PREFIX_EUCG3;
3945     }
3946     if (c2 == SO) c2 = JIS_X_0201;
3947     c1 = val & 0x7f;
3948     if (p2) *p2 = c2;
3949     if (p1) *p1 = c1;
3950     return 0;
3951 }
3952
3953 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3954 {
3955     int shift = 20;
3956     c &= VALUE_MASK;
3957     while(shift >= 0){
3958         if(c >= 1<<shift){
3959             while(shift >= 0){
3960                 (*f)(0, bin2hex(c>>shift));
3961                 shift -= 4;
3962             }
3963         }else{
3964             shift -= 4;
3965         }
3966     }
3967     return;
3968 }
3969
3970 void encode_fallback_html(nkf_char c)
3971 {
3972     (*oconv)(0, '&');
3973     (*oconv)(0, '#');
3974     c &= VALUE_MASK;
3975     if(c >= NKF_INT32_C(1000000))
3976         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3977     if(c >= NKF_INT32_C(100000))
3978         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3979     if(c >= 10000)
3980         (*oconv)(0, 0x30+(c/10000  )%10);
3981     if(c >= 1000)
3982         (*oconv)(0, 0x30+(c/1000   )%10);
3983     if(c >= 100)
3984         (*oconv)(0, 0x30+(c/100    )%10);
3985     if(c >= 10)
3986         (*oconv)(0, 0x30+(c/10     )%10);
3987     if(c >= 0)
3988         (*oconv)(0, 0x30+ c         %10);
3989     (*oconv)(0, ';');
3990     return;
3991 }
3992
3993 void encode_fallback_xml(nkf_char c)
3994 {
3995     (*oconv)(0, '&');
3996     (*oconv)(0, '#');
3997     (*oconv)(0, 'x');
3998     nkf_each_char_to_hex(oconv, c);
3999     (*oconv)(0, ';');
4000     return;
4001 }
4002
4003 void encode_fallback_java(nkf_char c)
4004 {
4005     (*oconv)(0, '\\');
4006     c &= VALUE_MASK;
4007     if(!is_unicode_bmp(c)){
4008         (*oconv)(0, 'U');
4009         (*oconv)(0, '0');
4010         (*oconv)(0, '0');
4011         (*oconv)(0, bin2hex(c>>20));
4012         (*oconv)(0, bin2hex(c>>16));
4013     }else{
4014         (*oconv)(0, 'u');
4015     }
4016     (*oconv)(0, bin2hex(c>>12));
4017     (*oconv)(0, bin2hex(c>> 8));
4018     (*oconv)(0, bin2hex(c>> 4));
4019     (*oconv)(0, bin2hex(c    ));
4020     return;
4021 }
4022
4023 void encode_fallback_perl(nkf_char c)
4024 {
4025     (*oconv)(0, '\\');
4026     (*oconv)(0, 'x');
4027     (*oconv)(0, '{');
4028     nkf_each_char_to_hex(oconv, c);
4029     (*oconv)(0, '}');
4030     return;
4031 }
4032
4033 void encode_fallback_subchar(nkf_char c)
4034 {
4035     c = unicode_subchar;
4036     (*oconv)((c>>8)&0xFF, c&0xFF);
4037     return;
4038 }
4039 #endif
4040
4041 #ifdef UTF8_OUTPUT_ENABLE
4042 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
4043 {
4044     const unsigned short *p;
4045
4046     if (c2 == JIS_X_0201) {
4047         if (ms_ucs_map_f == UCS_MAP_CP10001) {
4048             switch (c1) {
4049             case 0x20:
4050                 return 0xA0;
4051             case 0x7D:
4052                 return 0xA9;
4053             }
4054         }
4055         p = euc_to_utf8_1byte;
4056 #ifdef X0212_ENABLE
4057     } else if (is_eucg3(c2)){
4058         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
4059             return 0xA6;
4060         }
4061         c2 = (c2&0x7f) - 0x21;
4062         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4063             p = x0212_to_utf8_2bytes[c2];
4064         else
4065             return 0;
4066 #endif
4067     } else {
4068         c2 &= 0x7f;
4069         c2 = (c2&0x7f) - 0x21;
4070         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4071             p =
4072                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
4073                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
4074                 euc_to_utf8_2bytes_ms[c2];
4075         else
4076             return 0;
4077     }
4078     if (!p) return 0;
4079     c1 = (c1 & 0x7f) - 0x21;
4080     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
4081         return p[c1];
4082     return 0;
4083 }
4084
4085 void w_oconv(nkf_char c2, nkf_char c1)
4086 {
4087     nkf_char c0;
4088     nkf_char val;
4089
4090     if (output_bom_f) {
4091         output_bom_f = FALSE;
4092         (*o_putc)('\357');
4093         (*o_putc)('\273');
4094         (*o_putc)('\277');
4095     }
4096
4097     if (c2 == EOF) {
4098         (*o_putc)(EOF);
4099         return;
4100     }
4101
4102 #ifdef NUMCHAR_OPTION
4103     if (c2 == 0 && is_unicode_capsule(c1)){
4104         val = c1 & VALUE_MASK;
4105         if (val < 0x80){
4106             (*o_putc)(val);
4107         }else if (val < 0x800){
4108             (*o_putc)(0xC0 | (val >> 6));
4109             (*o_putc)(0x80 | (val & 0x3f));
4110         } else if (val <= NKF_INT32_C(0xFFFF)) {
4111             (*o_putc)(0xE0 | (val >> 12));
4112             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
4113             (*o_putc)(0x80 | (val        & 0x3f));
4114         } else if (val <= NKF_INT32_C(0x10FFFF)) {
4115             (*o_putc)(0xF0 | ( val>>18));
4116             (*o_putc)(0x80 | ((val>>12) & 0x3f));
4117             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
4118             (*o_putc)(0x80 | ( val      & 0x3f));
4119         }
4120         return;
4121     }
4122 #endif
4123
4124     if (c2 == 0) {
4125         output_mode = ASCII;
4126         (*o_putc)(c1);
4127     } else if (c2 == ISO_8859_1) {
4128         output_mode = UTF_8;
4129         (*o_putc)(c1 | 0x080);
4130     } else {
4131         output_mode = UTF_8;
4132         val = e2w_conv(c2, c1);
4133         if (val){
4134             w16w_conv(val, &c2, &c1, &c0);
4135             (*o_putc)(c2);
4136             if (c1){
4137                 (*o_putc)(c1);
4138                 if (c0) (*o_putc)(c0);
4139             }
4140         }
4141     }
4142 }
4143
4144 void w_oconv16(nkf_char c2, nkf_char c1)
4145 {
4146     if (output_bom_f) {
4147         output_bom_f = FALSE;
4148         if (output_endian == ENDIAN_LITTLE){
4149             (*o_putc)((unsigned char)'\377');
4150             (*o_putc)('\376');
4151         }else{
4152             (*o_putc)('\376');
4153             (*o_putc)((unsigned char)'\377');
4154         }
4155     }
4156
4157     if (c2 == EOF) {
4158         (*o_putc)(EOF);
4159         return;
4160     }
4161
4162     if (c2 == ISO_8859_1) {
4163         c2 = 0;
4164         c1 |= 0x80;
4165 #ifdef NUMCHAR_OPTION
4166     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4167         if (is_unicode_bmp(c1)) {
4168             c2 = (c1 >> 8) & 0xff;
4169             c1 &= 0xff;
4170         } else {
4171             c1 &= VALUE_MASK;
4172             if (c1 <= UNICODE_MAX) {
4173                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
4174                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
4175                 if (output_endian == ENDIAN_LITTLE){
4176                     (*o_putc)(c2 & 0xff);
4177                     (*o_putc)((c2 >> 8) & 0xff);
4178                     (*o_putc)(c1 & 0xff);
4179                     (*o_putc)((c1 >> 8) & 0xff);
4180                 }else{
4181                     (*o_putc)((c2 >> 8) & 0xff);
4182                     (*o_putc)(c2 & 0xff);
4183                     (*o_putc)((c1 >> 8) & 0xff);
4184                     (*o_putc)(c1 & 0xff);
4185                 }
4186             }
4187             return;
4188         }
4189 #endif
4190     } else if (c2) {
4191         nkf_char val = e2w_conv(c2, c1);
4192         c2 = (val >> 8) & 0xff;
4193         c1 = val & 0xff;
4194         if (!val) return;
4195     }
4196     if (output_endian == ENDIAN_LITTLE){
4197         (*o_putc)(c1);
4198         (*o_putc)(c2);
4199     }else{
4200         (*o_putc)(c2);
4201         (*o_putc)(c1);
4202     }
4203 }
4204
4205 void w_oconv32(nkf_char c2, nkf_char c1)
4206 {
4207     if (output_bom_f) {