OSDN Git Service

* refactoring encodings.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.157 2007/12/22 08:07:23 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-12-22"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #if defined(DEFAULT_CODE_JIS)
44 #elif defined(DEFAULT_CODE_SJIS)
45 #elif defined(DEFAULT_CODE_EUC)
46 #elif defined(DEFAULT_CODE_UTF8)
47 #else
48 #define DEFAULT_CODE_JIS 1
49 #endif
50
51 #ifndef MIME_DECODE_DEFAULT
52 #define MIME_DECODE_DEFAULT STRICT_MIME
53 #endif
54 #ifndef X0201_DEFAULT
55 #define X0201_DEFAULT TRUE
56 #endif
57
58 #if DEFAULT_NEWLINE == 0x0D0A
59 #define PUT_NEWLINE(func) do {\
60     func(0x0D);\
61     func(0x0A);\
62 } while (0)
63 #define OCONV_NEWLINE(func) do {\
64     func(0, 0x0D);\
65     func(0, 0x0A);\
66 } while (0)
67 #elif DEFAULT_NEWLINE == 0x0D
68 #define PUT_NEWLINE(func) func(0x0D)
69 #define OCONV_NEWLINE(func) func(0, 0x0D)
70 #else
71 #define DEFAULT_NEWLINE 0x0A
72 #define PUT_NEWLINE(func) func(0x0A)
73 #define OCONV_NEWLINE(func) func(0, 0x0A)
74 #endif
75
76 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
77 #define MSDOS
78 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
79 #define __WIN32__
80 #endif
81 #endif
82
83 #ifdef PERL_XS
84 #undef OVERWRITE
85 #endif
86
87 #ifndef PERL_XS
88 #include <stdio.h>
89 #endif
90
91 #include <stdlib.h>
92 #include <string.h>
93
94 #if defined(MSDOS) || defined(__OS2__)
95 #include <fcntl.h>
96 #include <io.h>
97 #if defined(_MSC_VER) || defined(__WATCOMC__)
98 #define mktemp _mktemp
99 #endif
100 #endif
101
102 #ifdef MSDOS
103 #ifdef LSI_C
104 #define setbinmode(fp) fsetbin(fp)
105 #elif defined(__DJGPP__)
106 #include <libc/dosio.h>
107 #define setbinmode(fp) djgpp_setbinmode(fp)
108 #else /* Microsoft C, Turbo C */
109 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
110 #endif
111 #else /* UNIX */
112 #define setbinmode(fp)
113 #endif
114
115 #if defined(__DJGPP__)
116 void  djgpp_setbinmode(FILE *fp)
117 {
118     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
119     int fd, m;
120     fd = fileno(fp);
121     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
122     __file_handle_set(fd, m);
123 }
124 #endif
125
126 #ifdef _IOFBF /* SysV and MSDOS, Windows */
127 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
128 #else /* BSD */
129 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
130 #endif
131
132 /*Borland C++ 4.5 EasyWin*/
133 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
134 #define         EASYWIN
135 #ifndef __WIN16__
136 #define __WIN16__
137 #endif
138 #include <windows.h>
139 #endif
140
141 #ifdef OVERWRITE
142 /* added by satoru@isoternet.org */
143 #if defined(__EMX__)
144 #include <sys/types.h>
145 #endif
146 #include <sys/stat.h>
147 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
148 #include <unistd.h>
149 #if defined(__WATCOMC__)
150 #include <sys/utime.h>
151 #else
152 #include <utime.h>
153 #endif
154 #else /* defined(MSDOS) */
155 #ifdef __WIN32__
156 #ifdef __BORLANDC__ /* BCC32 */
157 #include <utime.h>
158 #else /* !defined(__BORLANDC__) */
159 #include <sys/utime.h>
160 #endif /* (__BORLANDC__) */
161 #else /* !defined(__WIN32__) */
162 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
163 #include <sys/utime.h>
164 #elif defined(__TURBOC__) /* BCC */
165 #include <utime.h>
166 #elif defined(LSI_C) /* LSI C */
167 #endif /* (__WIN32__) */
168 #endif
169 #endif
170 #endif
171
172 #define         FALSE   0
173 #define         TRUE    1
174
175 /* state of output_mode and input_mode
176
177    c2           0 means ASCII
178                 JIS_X_0201
179                 ISO_8859_1
180                 JIS_X_0208
181                 EOF      all termination
182    c1           32bit data
183
184  */
185
186 /* Input Assumption */
187
188 #define         JIS_INPUT       4
189 #define         EUC_INPUT      16
190 #define         SJIS_INPUT      5
191 #define         LATIN1_INPUT    6
192 #define         UTF8_INPUT     13
193 #define         UTF16_INPUT    1015
194 #define         UTF32_INPUT    1017
195
196 #define         FIXED_MIME      7
197 #define         STRICT_MIME     8
198
199 /* MIME ENCODE */
200
201
202 /* byte order */
203
204 #define         ENDIAN_BIG      1234
205 #define         ENDIAN_LITTLE   4321
206 #define         ENDIAN_2143     2143
207 #define         ENDIAN_3412     3412
208
209 /* ASCII CODE */
210
211 #define         BS      0x08
212 #define         TAB     0x09
213 #define         LF      0x0a
214 #define         CR      0x0d
215 #define         ESC     0x1b
216 #define         SP      0x20
217 #define         AT      0x40
218 #define         SSP     0xa0
219 #define         DEL     0x7f
220 #define         SI      0x0f
221 #define         SO      0x0e
222 #define         SSO     0x8e
223 #define         SS3     0x8f
224 #define         CRLF    0x0D0A
225
226
227 /* encodings */
228
229 enum nkf_encodings {
230     ASCII,
231     ISO_8859_1,
232     ISO_2022_JP,
233     CP50220,
234     CP50221,
235     CP50222,
236     ISO_2022_JP_1,
237     ISO_2022_JP_3,
238     SHIFT_JIS,
239     WINDOWS_31J,
240     CP10001,
241     EUC_JP,
242     CP51932,
243     EUCJP_MS,
244     EUCJP_ASCII,
245     SHIFT_JISX0213,
246     SHIFT_JIS_2004,
247     EUC_JISX0213,
248     EUC_JIS_2004,
249     UTF_8,
250     UTF_8N,
251     UTF_8_BOM,
252     UTF8_MAC,
253     UTF_16,
254     UTF_16BE,
255     UTF_16BE_BOM,
256     UTF_16LE,
257     UTF_16LE_BOM,
258     UTF_32,
259     UTF_32BE,
260     UTF_32BE_BOM,
261     UTF_32LE,
262     UTF_32LE_BOM,
263     JIS_X_0201=0x1000,
264     JIS_X_0208,
265     JIS_X_0212,
266     JIS_X_0213_1,
267     JIS_X_0213_2,
268     BINARY
269 };
270
271 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
272 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
273 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
274 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
275 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
276 void j_oconv(nkf_char c2, nkf_char c1);
277 void s_oconv(nkf_char c2, nkf_char c1);
278 void e_oconv(nkf_char c2, nkf_char c1);
279 void w_oconv(nkf_char c2, nkf_char c1);
280 void w_oconv16(nkf_char c2, nkf_char c1);
281 void w_oconv32(nkf_char c2, nkf_char c1);
282
283 typedef struct {
284     char *name;
285     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
286     void (*oconv_func)(nkf_char c2, nkf_char c1);
287 } nkf_native_encoding;
288
289 nkf_native_encoding NkfEncodingASCII =          { "US_ASCII", e_iconv, e_oconv };
290 nkf_native_encoding NkfEncodingISO_2022_JP =    { "ISO-2022-JP", e_iconv, j_oconv };
291 nkf_native_encoding NkfEncodingShift_JIS =      { "Shift_JIS", s_iconv, s_oconv };
292 nkf_native_encoding NkfEncodingEUC_JP =         { "EUC-JP", e_iconv, e_oconv };
293 nkf_native_encoding NkfEncodingUTF_8 =          { "UTF-8", w_iconv, w_oconv };
294 nkf_native_encoding NkfEncodingUTF_16 =         { "UTF-16", w_iconv16, w_oconv16 };
295 nkf_native_encoding NkfEncodingUTF_32 =         { "UTF-32", w_iconv32, w_oconv32 };
296
297 typedef struct {
298     int id;
299     char *name;
300     nkf_native_encoding *based_encoding;
301 } nkf_encoding;
302 nkf_encoding nkf_encoding_table[] = {
303     {ASCII,             "ASCII",                &NkfEncodingASCII},
304     {ISO_8859_1,        "ISO-8859-1",           &NkfEncodingASCII},
305     {ISO_2022_JP,       "ISO-2022-JP",          &NkfEncodingASCII},
306     {CP50220,           "CP50220",              &NkfEncodingISO_2022_JP},
307     {CP50221,           "CP50221",              &NkfEncodingISO_2022_JP},
308     {CP50222,           "CP50222",              &NkfEncodingISO_2022_JP},
309     {ISO_2022_JP_1,     "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
310     {ISO_2022_JP_3,     "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
311     {SHIFT_JIS,         "Shift_JIS",            &NkfEncodingShift_JIS},
312     {WINDOWS_31J,       "WINDOWS-31J",          &NkfEncodingShift_JIS},
313     {CP10001,           "CP10001",              &NkfEncodingShift_JIS},
314     {EUC_JP,            "EUC-JP",               &NkfEncodingEUC_JP},
315     {CP51932,           "CP51932",              &NkfEncodingEUC_JP},
316     {EUCJP_MS,          "eucJP-MS",             &NkfEncodingEUC_JP},
317     {EUCJP_ASCII,       "eucJP-ASCII",          &NkfEncodingEUC_JP},
318     {SHIFT_JISX0213,    "Shift_JISX0213",       &NkfEncodingShift_JIS},
319     {SHIFT_JIS_2004,    "Shift_JIS-2004",       &NkfEncodingShift_JIS},
320     {EUC_JISX0213,      "EUC-JISX0213",         &NkfEncodingEUC_JP},
321     {EUC_JIS_2004,      "EUC-JIS-2004",         &NkfEncodingEUC_JP},
322     {UTF_8,             "UTF-8",                &NkfEncodingUTF_8},
323     {UTF_8N,            "UTF-8N",               &NkfEncodingUTF_8},
324     {UTF_8_BOM,         "UTF-8-BOM",            &NkfEncodingUTF_8},
325     {UTF8_MAC,          "UTF8-MAC",             &NkfEncodingUTF_8},
326     {UTF_16,            "UTF-16",               &NkfEncodingUTF_16},
327     {UTF_16BE,          "UTF-16BE",             &NkfEncodingUTF_16},
328     {UTF_16BE_BOM,      "UTF-16BE-BOM",         &NkfEncodingUTF_16},
329     {UTF_16LE,          "UTF-16LE",             &NkfEncodingUTF_16},
330     {UTF_16LE_BOM,      "UTF-16LE-BOM",         &NkfEncodingUTF_16},
331     {UTF_32,            "UTF-32",               &NkfEncodingUTF_32},
332     {UTF_32BE,          "UTF-32BE",             &NkfEncodingUTF_32},
333     {UTF_32BE_BOM,      "UTF-32BE-BOM",         &NkfEncodingUTF_32},
334     {UTF_32LE,          "UTF-32LE",             &NkfEncodingUTF_32},
335     {UTF_32LE_BOM,      "UTF-32LE-BOM",         &NkfEncodingUTF_32},
336     {BINARY,            "BINARY",               &NkfEncodingASCII},
337     {-1,                NULL,                   NULL}
338 };
339 #define NKF_ENCODING_TABLE_SIZE 34
340 struct {
341     const char *name;
342     const int id;
343 } encoding_name_to_id_table[] = {
344     {"ASCII",                   ASCII},
345     {"ISO-2022-JP",             ISO_2022_JP},
346     {"X-ISO2022JP-CP932",       CP50220},
347     {"CP50220",                 CP50220},
348     {"CP50221",                 CP50221},
349     {"CP50222",                 CP50222},
350     {"ISO-2022-JP-1",           ISO_2022_JP_1},
351     {"ISO-2022-JP-3",           ISO_2022_JP_3},
352     {"SHIFT_JIS",               SHIFT_JIS},
353     {"SJIS",                    SHIFT_JIS},
354     {"WINDOWS-31J",             WINDOWS_31J},
355     {"CSWINDOWS31J",            WINDOWS_31J},
356     {"CP932",                   WINDOWS_31J},
357     {"MS932",                   WINDOWS_31J},
358     {"CP10001",                 CP10001},
359     {"EUCJP",                   EUC_JP},
360     {"EUC-JP",                  EUC_JP},
361     {"CP51932",                 CP51932},
362     {"EUC-JP-MS",               EUCJP_MS},
363     {"EUCJP-MS",                EUCJP_MS},
364     {"EUCJPMS",                 EUCJP_MS},
365     {"EUC-JP-ASCII",            EUCJP_ASCII},
366     {"EUCJP-ASCII",             EUCJP_ASCII},
367     {"SHIFT_JISX0213",          SHIFT_JISX0213},
368     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
369     {"EUC-JISX0213",            EUC_JISX0213},
370     {"EUC-JIS-2004",            EUC_JIS_2004},
371     {"UTF-8",                   UTF_8},
372     {"UTF-8N",                  UTF_8N},
373     {"UTF-8-BOM",               UTF_8_BOM},
374     {"UTF8-MAC",                UTF8_MAC},
375     {"UTF-8-MAC",               UTF8_MAC},
376     {"UTF-16",                  UTF_16},
377     {"UTF-16BE",                UTF_16BE},
378     {"UTF-16BE-BOM",            UTF_16BE_BOM},
379     {"UTF-16LE",                UTF_16LE},
380     {"UTF-16LE-BOM",            UTF_16LE_BOM},
381     {"UTF-32",                  UTF_32},
382     {"UTF-32BE",                UTF_32BE},
383     {"UTF-32BE-BOM",            UTF_32BE_BOM},
384     {"UTF-32LE",                UTF_32LE},
385     {"UTF-32LE-BOM",            UTF_32LE_BOM},
386     {"BINARY",                  BINARY},
387     {NULL,                      -1}
388 };
389 #if defined(DEFAULT_CODE_JIS)
390 #define     DEFAULT_ENCODING ISO_2022_JP
391 #elif defined(DEFAULT_CODE_SJIS)
392 #define     DEFAULT_ENCODING SHIFT_JIS
393 #elif defined(DEFAULT_CODE_EUC)
394 #define     DEFAULT_ENCODING EUC_JP
395 #elif defined(DEFAULT_CODE_UTF8)
396 #define     DEFAULT_ENCODING UTF_8
397 #endif
398
399
400 #define         is_alnum(c)  \
401             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
402
403 /* I don't trust portablity of toupper */
404 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
405 #define nkf_isoctal(c)  ('0'<=c && c<='7')
406 #define nkf_isdigit(c)  ('0'<=c && c<='9')
407 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
408 #define nkf_isblank(c) (c == SP || c == TAB)
409 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
410 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
411 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
412 #define nkf_isprint(c) (SP<=c && c<='~')
413 #define nkf_isgraph(c) ('!'<=c && c<='~')
414 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
415                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
416                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
417 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
418 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
419 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
420     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
421      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
422
423 #define CP932_TABLE_BEGIN 0xFA
424 #define CP932_TABLE_END   0xFC
425 #define CP932INV_TABLE_BEGIN 0xED
426 #define CP932INV_TABLE_END   0xEE
427 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
428
429 #define         HOLD_SIZE       1024
430 #if defined(INT_IS_SHORT)
431 #define         IOBUF_SIZE      2048
432 #else
433 #define         IOBUF_SIZE      16384
434 #endif
435
436 #define         DEFAULT_J       'B'
437 #define         DEFAULT_R       'B'
438
439 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
440 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
441
442 #define         RANGE_NUM_MAX   18
443 #define         GETA1   0x22
444 #define         GETA2   0x2e
445
446
447 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
448 #define sizeof_euc_to_utf8_1byte 94
449 #define sizeof_euc_to_utf8_2bytes 94
450 #define sizeof_utf8_to_euc_C2 64
451 #define sizeof_utf8_to_euc_E5B8 64
452 #define sizeof_utf8_to_euc_2bytes 112
453 #define sizeof_utf8_to_euc_3bytes 16
454 #endif
455
456 /* MIME preprocessor */
457
458 #ifdef EASYWIN /*Easy Win */
459 extern POINT _BufferSize;
460 #endif
461
462 struct input_code{
463     char *name;
464     nkf_char stat;
465     nkf_char score;
466     nkf_char index;
467     nkf_char buf[3];
468     void (*status_func)(struct input_code *, nkf_char);
469     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
470     int _file_stat;
471 };
472
473 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
474 static nkf_encoding *output_encoding;\r
475
476 #if !defined(PERL_XS) && !defined(WIN32DLL)
477 static  nkf_char     noconvert(FILE *f);
478 #endif
479 static  void    module_connection(void);
480 static  nkf_char     kanji_convert(FILE *f);
481 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
482 static  nkf_char     push_hold_buf(nkf_char c2);
483 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
484 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
485 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
486 /* UCS Mapping
487  * 0: Shift_JIS, eucJP-ascii
488  * 1: eucJP-ms
489  * 2: CP932, CP51932
490  * 3: CP10001
491  */
492 #define UCS_MAP_ASCII   0
493 #define UCS_MAP_MS      1
494 #define UCS_MAP_CP932   2
495 #define UCS_MAP_CP10001 3
496 static int ms_ucs_map_f = UCS_MAP_ASCII;
497 #endif
498 #ifdef UTF8_INPUT_ENABLE
499 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
500 static  int     no_cp932ext_f = FALSE;
501 /* ignore ZERO WIDTH NO-BREAK SPACE */
502 static  int     no_best_fit_chars_f = FALSE;
503 static  int     input_endian = ENDIAN_BIG;
504 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
505 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
506 static  void    encode_fallback_html(nkf_char c);
507 static  void    encode_fallback_xml(nkf_char c);
508 static  void    encode_fallback_java(nkf_char c);
509 static  void    encode_fallback_perl(nkf_char c);
510 static  void    encode_fallback_subchar(nkf_char c);
511 static  void    (*encode_fallback)(nkf_char c) = NULL;
512 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
513 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
514 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
515 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
516 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
517 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
518 static  void    w_status(struct input_code *, nkf_char);
519 #endif
520 #ifdef UTF8_OUTPUT_ENABLE
521 static  int     output_bom_f = FALSE;
522 static  int     output_endian = ENDIAN_BIG;
523 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
524 #endif
525 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
526 static  void    fold_conv(nkf_char c2,nkf_char c1);
527 static  void    nl_conv(nkf_char c2,nkf_char c1);
528 static  void    z_conv(nkf_char c2,nkf_char c1);
529 static  void    rot_conv(nkf_char c2,nkf_char c1);
530 static  void    hira_conv(nkf_char c2,nkf_char c1);
531 static  void    base64_conv(nkf_char c2,nkf_char c1);
532 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
533 static  void    no_connection(nkf_char c2,nkf_char c1);
534 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
535
536 static  void    code_score(struct input_code *ptr);
537 static  void    code_status(nkf_char c);
538
539 static  void    std_putc(nkf_char c);
540 static  nkf_char     std_getc(FILE *f);
541 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
542
543 static  nkf_char     broken_getc(FILE *f);
544 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
545
546 static  nkf_char     mime_begin(FILE *f);
547 static  nkf_char     mime_getc(FILE *f);
548 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
549
550 static  void    switch_mime_getc(void);
551 static  void    unswitch_mime_getc(void);
552 static  nkf_char     mime_begin_strict(FILE *f);
553 static  nkf_char     mime_getc_buf(FILE *f);
554 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
555 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
556
557 static  nkf_char     base64decode(nkf_char c);
558 static  void    mime_prechar(nkf_char c2, nkf_char c1);
559 static  void    mime_putc(nkf_char c);
560 static  void    open_mime(nkf_char c);
561 static  void    close_mime(void);
562 static  void    eof_mime(void);
563 static  void    mimeout_addchar(nkf_char c);
564 #ifndef PERL_XS
565 static  void    usage(void);
566 static  void    version(void);
567 static  void    show_configuration(void);
568 #endif
569 static  void    options(unsigned char *c);
570 static  void    reinit(void);
571
572 /* buffers */
573
574 #if !defined(PERL_XS) && !defined(WIN32DLL)
575 static unsigned char   stdibuf[IOBUF_SIZE];
576 static unsigned char   stdobuf[IOBUF_SIZE];
577 #endif
578 static unsigned char   hold_buf[HOLD_SIZE*2];
579 static int             hold_count = 0;
580
581 /* MIME preprocessor fifo */
582
583 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
584 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
585 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
586 static unsigned char           mime_buf[MIME_BUF_SIZE];
587 static unsigned int            mime_top = 0;
588 static unsigned int            mime_last = 0;  /* decoded */
589 static unsigned int            mime_input = 0; /* undecoded */
590 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
591
592 /* flags */
593 static int             unbuf_f = FALSE;
594 static int             estab_f = FALSE;
595 static int             nop_f = FALSE;
596 static int             binmode_f = TRUE;       /* binary mode */
597 static int             rot_f = FALSE;          /* rot14/43 mode */
598 static int             hira_f = FALSE;          /* hira/kata henkan */
599 static int             input_f = FALSE;        /* non fixed input code  */
600 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
601 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
602 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
603 static int             mimebuf_f = FALSE;      /* MIME buffered input */
604 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
605 static int             iso8859_f = FALSE;      /* ISO8859 through */
606 static int             mimeout_f = FALSE;       /* base64 mode */
607 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
608 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
609
610 #ifdef UNICODE_NORMALIZATION
611 static int nfc_f = FALSE;
612 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
613 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
614 static nkf_char nfc_getc(FILE *f);
615 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
616 #endif
617
618 #ifdef INPUT_OPTION
619 static int cap_f = FALSE;
620 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
621 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
622 static nkf_char cap_getc(FILE *f);
623 static nkf_char cap_ungetc(nkf_char c,FILE *f);
624
625 static int url_f = FALSE;
626 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
627 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
628 static nkf_char url_getc(FILE *f);
629 static nkf_char url_ungetc(nkf_char c,FILE *f);
630 #endif
631
632 #if defined(INT_IS_SHORT)
633 #define NKF_INT32_C(n)   (n##L)
634 #else
635 #define NKF_INT32_C(n)   (n)
636 #endif
637 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
638 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
639 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
640 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
641 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
642 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
643 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
644
645 #ifdef NUMCHAR_OPTION
646 static int numchar_f = FALSE;
647 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
648 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
649 static nkf_char numchar_getc(FILE *f);
650 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
651 #endif
652
653 #ifdef CHECK_OPTION
654 static int noout_f = FALSE;
655 static void no_putc(nkf_char c);
656 static int debug_f = FALSE;
657 static void debug(const char *str);
658 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
659 #endif
660
661 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
662 #if !defined PERL_XS
663 static  void    print_guessed_code(char *filename);
664 #endif
665 static  void    set_input_codename(char *codename);
666
667 #ifdef EXEC_IO
668 static int exec_f = 0;
669 #endif
670
671 #ifdef SHIFTJIS_CP932
672 /* invert IBM extended characters to others */
673 static int cp51932_f = FALSE;
674
675 /* invert NEC-selected IBM extended characters to IBM extended characters */
676 static int cp932inv_f = TRUE;
677
678 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
679 #endif /* SHIFTJIS_CP932 */
680
681 #ifdef X0212_ENABLE
682 static int x0212_f = FALSE;
683 static nkf_char x0212_shift(nkf_char c);
684 static nkf_char x0212_unshift(nkf_char c);
685 #endif
686 static int x0213_f = FALSE;
687
688 static unsigned char prefix_table[256];
689
690 static void set_code_score(struct input_code *ptr, nkf_char score);
691 static void clr_code_score(struct input_code *ptr, nkf_char score);
692 static void status_disable(struct input_code *ptr);
693 static void status_push_ch(struct input_code *ptr, nkf_char c);
694 static void status_clear(struct input_code *ptr);
695 static void status_reset(struct input_code *ptr);
696 static void status_reinit(struct input_code *ptr);
697 static void status_check(struct input_code *ptr, nkf_char c);
698 static void e_status(struct input_code *, nkf_char);
699 static void s_status(struct input_code *, nkf_char);
700
701 struct input_code input_code_list[] = {
702     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
703     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
704 #ifdef UTF8_INPUT_ENABLE
705     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
706     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
707     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
708 #endif
709     {0}
710 };
711
712 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
713 static int              base64_count = 0;
714
715 /* X0208 -> ASCII converter */
716
717 /* fold parameter */
718 static int             f_line = 0;    /* chars in line */
719 static int             f_prev = 0;
720 static int             fold_preserve_f = FALSE; /* preserve new lines */
721 static int             fold_f  = FALSE;
722 static int             fold_len  = 0;
723
724 /* options */
725 static unsigned char   kanji_intro = DEFAULT_J;
726 static unsigned char   ascii_intro = DEFAULT_R;
727
728 /* Folding */
729
730 #define FOLD_MARGIN  10
731 #define DEFAULT_FOLD 60
732
733 static int             fold_margin  = FOLD_MARGIN;
734
735 /* converters */
736
737 #ifdef DEFAULT_CODE_JIS
738 #   define  DEFAULT_CONV j_oconv
739 #endif
740 #ifdef DEFAULT_CODE_SJIS
741 #   define  DEFAULT_CONV s_oconv
742 #endif
743 #ifdef DEFAULT_CODE_EUC
744 #   define  DEFAULT_CONV e_oconv
745 #endif
746 #ifdef DEFAULT_CODE_UTF8
747 #   define  DEFAULT_CONV w_oconv
748 #endif
749
750 /* process default */
751 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
752
753 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
754 /* s_iconv or oconv */
755 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
756
757 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
758 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
759 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
760 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
761 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
762 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
763 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
764
765 /* static redirections */
766
767 static  void   (*o_putc)(nkf_char c) = std_putc;
768
769 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
770 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
771
772 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
773 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
774
775 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
776
777 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
778 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
779
780 /* for strict mime */
781 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
782 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
783
784 /* Global states */
785 static int output_mode = ASCII,    /* output kanji mode */
786            input_mode =  ASCII,    /* input kanji mode */
787            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
788 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
789
790 /* X0201 / X0208 conversion tables */
791
792 /* X0201 kana conversion table */
793 /* 90-9F A0-DF */
794 static const unsigned char cv[]= {
795     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
796     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
797     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
798     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
799     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
800     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
801     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
802     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
803     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
804     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
805     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
806     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
807     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
808     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
809     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
810     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
811     0x00,0x00};
812
813
814 /* X0201 kana conversion table for daguten */
815 /* 90-9F A0-DF */
816 static const unsigned char dv[]= {
817     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
822     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
823     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
824     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
825     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
826     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
827     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
828     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
829     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
832     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
833     0x00,0x00};
834
835 /* X0201 kana conversion table for han-daguten */
836 /* 90-9F A0-DF */
837 static const unsigned char ev[]= {
838     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
849     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
850     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854     0x00,0x00};
855
856
857 /* X0208 kigou conversion table */
858 /* 0x8140 - 0x819e */
859 static const unsigned char fv[] = {
860
861     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
862     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
863     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
864     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
865     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
866     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
867     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
868     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
869     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
870     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
871     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
872     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
873 } ;
874
875
876
877 static int             file_out_f = FALSE;
878 #ifdef OVERWRITE
879 static int             overwrite_f = FALSE;
880 static int             preserve_time_f = FALSE;
881 static int             backup_f = FALSE;
882 static char            *backup_suffix = "";
883 static char *get_backup_filename(const char *suffix, const char *filename);
884 #endif
885
886 static int nlmode_f = 0;   /* CR, LF, CRLF */
887 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
888 static nkf_char prev_cr = 0; /* CR or 0 */
889 #ifdef EASYWIN /*Easy Win */
890 static int             end_check;
891 #endif /*Easy Win */
892
893 #define STD_GC_BUFSIZE (256)
894 nkf_char std_gc_buf[STD_GC_BUFSIZE];
895 nkf_char std_gc_ndx;
896
897 char* nkf_strcpy(const char *str)
898 {
899     char* result = malloc(strlen(str) + 1);
900     if (!result){
901         perror(str);
902         return "";
903     }
904     strcpy(result, str);
905     return result;
906 }
907
908 static void nkf_str_upcase(const char *str, char *res, size_t length)
909 {
910     int i = 0;
911     for (; i < length && str[i]; i++) {
912         res[i] = nkf_toupper(str[i]);
913     }
914     res[i] = 0;
915 }
916
917 static nkf_encoding *nkf_enc_from_index(int idx)
918 {
919     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
920         return 0;
921     }
922     return &nkf_encoding_table[idx];
923 }
924
925 static int nkf_enc_find_index(const char *name)
926 {
927     int i, index = -1;
928     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
929         if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
930             return encoding_name_to_id_table[i].id;
931         }
932     }
933     return index;
934 }
935
936 static nkf_encoding *nkf_enc_find(const char *name)
937 {
938     int idx = -1;
939     idx = nkf_enc_find_index(name);
940     if (idx < 0) return 0;
941     return nkf_enc_from_index(idx);
942 }
943
944 #define nkf_enc_name(enc) (enc)->name\r
945 #define nkf_enc_to_index(enc) (enc)->id
946 #define nkf_enc_to_base_encoding(enc) (enc)->based_encoding
947
948 #ifdef WIN32DLL
949 #include "nkf32dll.c"
950 #elif defined(PERL_XS)
951 #else /* WIN32DLL */
952 int main(int argc, char **argv)
953 {
954     FILE  *fin;
955     unsigned char  *cp;
956
957     char *outfname = NULL;
958     char *origfname;
959
960 #ifdef EASYWIN /*Easy Win */
961     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
962 #endif
963
964     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
965         cp = (unsigned char *)*argv;
966         options(cp);
967         if (guess_f) {
968 #ifdef CHECK_OPTION
969             int debug_f_back = debug_f;
970 #endif
971 #ifdef EXEC_IO
972             int exec_f_back = exec_f;
973 #endif
974 #ifdef X0212_ENABLE
975             int x0212_f_back = x0212_f;
976 #endif
977             int x0213_f_back = x0213_f;
978             int guess_f_back = guess_f;
979             reinit();
980             guess_f = guess_f_back;
981             mime_f = FALSE;
982 #ifdef CHECK_OPTION
983             debug_f = debug_f_back;
984 #endif
985 #ifdef EXEC_IO
986             exec_f = exec_f_back;
987 #endif
988 #ifdef X0212_ENABLE
989             x0212_f = x0212_f_back;
990 #endif
991             x0213_f = x0213_f_back;
992         }
993 #ifdef EXEC_IO
994         if (exec_f){
995             int fds[2], pid;
996             if (pipe(fds) < 0 || (pid = fork()) < 0){
997                 abort();
998             }
999             if (pid == 0){
1000                 if (exec_f > 0){
1001                     close(fds[0]);
1002                     dup2(fds[1], 1);
1003                 }else{
1004                     close(fds[1]);
1005                     dup2(fds[0], 0);
1006                 }
1007                 execvp(argv[1], &argv[1]);
1008             }
1009             if (exec_f > 0){
1010                 close(fds[1]);
1011                 dup2(fds[0], 0);
1012             }else{
1013                 close(fds[0]);
1014                 dup2(fds[1], 1);
1015             }
1016             argc = 0;
1017             break;
1018         }
1019 #endif
1020     }
1021
1022     if (binmode_f == TRUE)
1023 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1024     if (freopen("","wb",stdout) == NULL)
1025         return (-1);
1026 #else
1027     setbinmode(stdout);
1028 #endif
1029
1030     if (unbuf_f)
1031       setbuf(stdout, (char *) NULL);
1032     else
1033       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
1034
1035     if (argc == 0) {
1036       if (binmode_f == TRUE)
1037 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1038       if (freopen("","rb",stdin) == NULL) return (-1);
1039 #else
1040       setbinmode(stdin);
1041 #endif
1042       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
1043       if (nop_f)
1044           noconvert(stdin);
1045       else {
1046           kanji_convert(stdin);
1047           if (guess_f) print_guessed_code(NULL);
1048       }
1049     } else {
1050       int nfiles = argc;
1051         int is_argument_error = FALSE;
1052       while (argc--) {
1053             input_codename = NULL;
1054             input_newline = 0;
1055 #ifdef CHECK_OPTION
1056             iconv_for_check = 0;
1057 #endif
1058           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
1059               perror(*--argv);
1060                 *argv++;
1061                 is_argument_error = TRUE;
1062                 continue;
1063           } else {
1064 #ifdef OVERWRITE
1065               int fd = 0;
1066               int fd_backup = 0;
1067 #endif
1068
1069 /* reopen file for stdout */
1070               if (file_out_f == TRUE) {
1071 #ifdef OVERWRITE
1072                   if (overwrite_f){
1073                       outfname = malloc(strlen(origfname)
1074                                         + strlen(".nkftmpXXXXXX")
1075                                         + 1);
1076                       if (!outfname){
1077                           perror(origfname);
1078                           return -1;
1079                       }
1080                       strcpy(outfname, origfname);
1081 #ifdef MSDOS
1082                       {
1083                           int i;
1084                           for (i = strlen(outfname); i; --i){
1085                               if (outfname[i - 1] == '/'
1086                                   || outfname[i - 1] == '\\'){
1087                                   break;
1088                               }
1089                           }
1090                           outfname[i] = '\0';
1091                       }
1092                       strcat(outfname, "ntXXXXXX");
1093                       mktemp(outfname);
1094                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
1095                                 S_IREAD | S_IWRITE);
1096 #else
1097                       strcat(outfname, ".nkftmpXXXXXX");
1098                       fd = mkstemp(outfname);
1099 #endif
1100                       if (fd < 0
1101                           || (fd_backup = dup(fileno(stdout))) < 0
1102                           || dup2(fd, fileno(stdout)) < 0
1103                           ){
1104                           perror(origfname);
1105                           return -1;
1106                       }
1107                   }else
1108 #endif
1109                   if(argc == 1) {
1110                       outfname = *argv++;
1111                       argc--;
1112                   } else {
1113                       outfname = "nkf.out";
1114                   }
1115
1116                   if(freopen(outfname, "w", stdout) == NULL) {
1117                       perror (outfname);
1118                       return (-1);
1119                   }
1120                   if (binmode_f == TRUE) {
1121 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1122                       if (freopen("","wb",stdout) == NULL)
1123                            return (-1);
1124 #else
1125                       setbinmode(stdout);
1126 #endif
1127                   }
1128               }
1129               if (binmode_f == TRUE)
1130 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1131                  if (freopen("","rb",fin) == NULL)
1132                     return (-1);
1133 #else
1134                  setbinmode(fin);
1135 #endif
1136               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
1137               if (nop_f)
1138                   noconvert(fin);
1139               else {
1140                   char *filename = NULL;
1141                   kanji_convert(fin);
1142                   if (nfiles > 1) filename = origfname;
1143                   if (guess_f) print_guessed_code(filename);
1144               }
1145               fclose(fin);
1146 #ifdef OVERWRITE
1147               if (overwrite_f) {
1148                   struct stat     sb;
1149 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1150                   time_t tb[2];
1151 #else
1152                   struct utimbuf  tb;
1153 #endif
1154
1155                   fflush(stdout);
1156                   close(fd);
1157                   if (dup2(fd_backup, fileno(stdout)) < 0){
1158                       perror("dup2");
1159                   }
1160                   if (stat(origfname, &sb)) {
1161                       fprintf(stderr, "Can't stat %s\n", origfname);
1162                   }
1163                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
1164                   if (chmod(outfname, sb.st_mode)) {
1165                       fprintf(stderr, "Can't set permission %s\n", outfname);
1166                   }
1167
1168                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
1169                     if(preserve_time_f){
1170 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1171                         tb[0] = tb[1] = sb.st_mtime;
1172                         if (utime(outfname, tb)) {
1173                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1174                         }
1175 #else
1176                         tb.actime  = sb.st_atime;
1177                         tb.modtime = sb.st_mtime;
1178                         if (utime(outfname, &tb)) {
1179                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1180                         }
1181 #endif
1182                     }
1183                     if(backup_f){
1184                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
1185 #ifdef MSDOS
1186                         unlink(backup_filename);
1187 #endif
1188                         if (rename(origfname, backup_filename)) {
1189                             perror(backup_filename);
1190                             fprintf(stderr, "Can't rename %s to %s\n",
1191                                     origfname, backup_filename);
1192                         }
1193                     }else{
1194 #ifdef MSDOS
1195                         if (unlink(origfname)){
1196                             perror(origfname);
1197                         }
1198 #endif
1199                     }
1200                   if (rename(outfname, origfname)) {
1201                       perror(origfname);
1202                       fprintf(stderr, "Can't rename %s to %s\n",
1203                               outfname, origfname);
1204                   }
1205                   free(outfname);
1206               }
1207 #endif
1208           }
1209       }
1210         if (is_argument_error)
1211             return(-1);
1212     }
1213 #ifdef EASYWIN /*Easy Win */
1214     if (file_out_f == FALSE)
1215         scanf("%d",&end_check);
1216     else
1217         fclose(stdout);
1218 #else /* for Other OS */
1219     if (file_out_f == TRUE)
1220         fclose(stdout);
1221 #endif /*Easy Win */
1222     return (0);
1223 }
1224 #endif /* WIN32DLL */
1225
1226 #ifdef OVERWRITE
1227 char *get_backup_filename(const char *suffix, const char *filename)
1228 {
1229     char *backup_filename;
1230     int asterisk_count = 0;
1231     int i, j;
1232     int filename_length = strlen(filename);
1233
1234     for(i = 0; suffix[i]; i++){
1235         if(suffix[i] == '*') asterisk_count++;
1236     }
1237
1238     if(asterisk_count){
1239         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1240         if (!backup_filename){
1241             perror("Can't malloc backup filename.");
1242             return NULL;
1243         }
1244
1245         for(i = 0, j = 0; suffix[i];){
1246             if(suffix[i] == '*'){
1247                 backup_filename[j] = '\0';
1248                 strncat(backup_filename, filename, filename_length);
1249                 i++;
1250                 j += filename_length;
1251             }else{
1252                 backup_filename[j++] = suffix[i++];
1253             }
1254         }
1255         backup_filename[j] = '\0';
1256     }else{
1257         j = strlen(suffix) + filename_length;
1258         backup_filename = malloc( + 1);
1259         strcpy(backup_filename, filename);
1260         strcat(backup_filename, suffix);
1261         backup_filename[j] = '\0';
1262     }
1263     return backup_filename;
1264 }
1265 #endif
1266
1267 static const struct {
1268     const char *name;
1269     const char *alias;
1270 } long_option[] = {
1271     {"ic=", ""},
1272     {"oc=", ""},
1273     {"base64","jMB"},
1274     {"euc","e"},
1275     {"euc-input","E"},
1276     {"fj","jm"},
1277     {"help","v"},
1278     {"jis","j"},
1279     {"jis-input","J"},
1280     {"mac","sLm"},
1281     {"mime","jM"},
1282     {"mime-input","m"},
1283     {"msdos","sLw"},
1284     {"sjis","s"},
1285     {"sjis-input","S"},
1286     {"unix","eLu"},
1287     {"version","V"},
1288     {"windows","sLw"},
1289     {"hiragana","h1"},
1290     {"katakana","h2"},
1291     {"katakana-hiragana","h3"},
1292     {"guess=", ""},
1293     {"guess", "g1"},
1294     {"cp932", ""},
1295     {"no-cp932", ""},
1296 #ifdef X0212_ENABLE
1297     {"x0212", ""},
1298 #endif
1299 #ifdef UTF8_OUTPUT_ENABLE
1300     {"utf8", "w"},
1301     {"utf16", "w16"},
1302     {"ms-ucs-map", ""},
1303     {"fb-skip", ""},
1304     {"fb-html", ""},
1305     {"fb-xml", ""},
1306     {"fb-perl", ""},
1307     {"fb-java", ""},
1308     {"fb-subchar", ""},
1309     {"fb-subchar=", ""},
1310 #endif
1311 #ifdef UTF8_INPUT_ENABLE
1312     {"utf8-input", "W"},
1313     {"utf16-input", "W16"},
1314     {"no-cp932ext", ""},
1315     {"no-best-fit-chars",""},
1316 #endif
1317 #ifdef UNICODE_NORMALIZATION
1318     {"utf8mac-input", ""},
1319 #endif
1320 #ifdef OVERWRITE
1321     {"overwrite", ""},
1322     {"overwrite=", ""},
1323     {"in-place", ""},
1324     {"in-place=", ""},
1325 #endif
1326 #ifdef INPUT_OPTION
1327     {"cap-input", ""},
1328     {"url-input", ""},
1329 #endif
1330 #ifdef NUMCHAR_OPTION
1331     {"numchar-input", ""},
1332 #endif
1333 #ifdef CHECK_OPTION
1334     {"no-output", ""},
1335     {"debug", ""},
1336 #endif
1337 #ifdef SHIFTJIS_CP932
1338     {"cp932inv", ""},
1339 #endif
1340 #ifdef EXEC_IO
1341     {"exec-in", ""},
1342     {"exec-out", ""},
1343 #endif
1344     {"prefix=", ""},
1345 };
1346
1347 static int option_mode = 0;
1348
1349 void options(unsigned char *cp)
1350 {
1351     nkf_char i, j;
1352     unsigned char *p;
1353     unsigned char *cp_back = NULL;
1354     char codeset[32];
1355     nkf_encoding *enc;
1356
1357     if (option_mode==1)
1358         return;
1359     while(*cp && *cp++!='-');
1360     while (*cp || cp_back) {
1361         if(!*cp){
1362             cp = cp_back;
1363             cp_back = NULL;
1364             continue;
1365         }
1366         p = 0;
1367         switch (*cp++) {
1368         case '-':  /* literal options */
1369             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1370                 option_mode = 1;
1371                 return;
1372             }
1373             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1374                 p = (unsigned char *)long_option[i].name;
1375                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1376                 if (*p == cp[j] || cp[j] == SP){
1377                     p = &cp[j] + 1;
1378                     break;
1379                 }
1380                 p = 0;
1381             }
1382             if (p == 0) {
1383                 fprintf(stderr, "unknown long option: --%s\n", cp);
1384                 return;
1385             }
1386             while(*cp && *cp != SP && cp++);
1387             if (long_option[i].alias[0]){
1388                 cp_back = cp;
1389                 cp = (unsigned char *)long_option[i].alias;
1390             }else{
1391                 if (strcmp(long_option[i].name, "ic=") == 0){
1392                     nkf_str_upcase(p, codeset, 32);
1393                     enc = nkf_enc_find(codeset);
1394                     switch (nkf_enc_to_index(enc)) {
1395                     case ISO_2022_JP:
1396                         input_f = JIS_INPUT;
1397                         break;
1398                     case CP50220:
1399                     case CP50221:
1400                     case CP50222:
1401                         input_f = JIS_INPUT;
1402 #ifdef SHIFTJIS_CP932
1403                         cp51932_f = TRUE;
1404 #endif
1405 #ifdef UTF8_OUTPUT_ENABLE
1406                         ms_ucs_map_f = UCS_MAP_CP932;
1407 #endif
1408                         break;
1409                     case ISO_2022_JP_1:
1410                         input_f = JIS_INPUT;
1411 #ifdef X0212_ENABLE
1412                         x0212_f = TRUE;
1413 #endif
1414                         break;
1415                     case ISO_2022_JP_3:
1416                         input_f = JIS_INPUT;
1417 #ifdef X0212_ENABLE
1418                         x0212_f = TRUE;
1419 #endif
1420                         x0213_f = TRUE;
1421                         break;
1422                     case SHIFT_JIS:
1423                         input_f = SJIS_INPUT;
1424                         break;
1425                     case WINDOWS_31J:
1426                         input_f = SJIS_INPUT;
1427 #ifdef SHIFTJIS_CP932
1428                         cp51932_f = TRUE;
1429 #endif
1430 #ifdef UTF8_OUTPUT_ENABLE
1431                         ms_ucs_map_f = UCS_MAP_CP932;
1432 #endif
1433                         break;
1434                     case CP10001:
1435                         input_f = SJIS_INPUT;
1436 #ifdef SHIFTJIS_CP932
1437                         cp51932_f = TRUE;
1438 #endif
1439 #ifdef UTF8_OUTPUT_ENABLE
1440                         ms_ucs_map_f = UCS_MAP_CP10001;
1441 #endif
1442                         break;
1443                     case EUC_JP:
1444                         input_f = EUC_INPUT;
1445                         break;
1446                     case CP51932:
1447                         input_f = EUC_INPUT;
1448 #ifdef SHIFTJIS_CP932
1449                         cp51932_f = TRUE;
1450 #endif
1451 #ifdef UTF8_OUTPUT_ENABLE
1452                         ms_ucs_map_f = UCS_MAP_CP932;
1453 #endif
1454                         break;
1455                     case EUCJP_MS:
1456                         input_f = EUC_INPUT;
1457 #ifdef SHIFTJIS_CP932
1458                         cp51932_f = FALSE;
1459 #endif
1460 #ifdef UTF8_OUTPUT_ENABLE
1461                         ms_ucs_map_f = UCS_MAP_MS;
1462 #endif
1463                         break;
1464                     case EUCJP_ASCII:
1465                         input_f = EUC_INPUT;
1466 #ifdef SHIFTJIS_CP932
1467                         cp51932_f = FALSE;
1468 #endif
1469 #ifdef UTF8_OUTPUT_ENABLE
1470                         ms_ucs_map_f = UCS_MAP_ASCII;
1471 #endif
1472                         break;
1473                     case SHIFT_JISX0213:
1474                     case SHIFT_JIS_2004:
1475                         input_f = SJIS_INPUT;
1476                         x0213_f = TRUE;
1477 #ifdef SHIFTJIS_CP932
1478                         cp51932_f = FALSE;
1479 #endif
1480                         break;
1481                     case EUC_JISX0213:
1482                     case EUC_JIS_2004:
1483                         input_f = EUC_INPUT;
1484                         x0213_f = TRUE;
1485 #ifdef SHIFTJIS_CP932
1486                         cp51932_f = FALSE;
1487 #endif
1488                         break;
1489 #ifdef UTF8_INPUT_ENABLE
1490                     case UTF_8:
1491                     case UTF_8N:
1492                     case UTF_8_BOM:
1493                         input_f = UTF8_INPUT;
1494                         break;
1495 #ifdef UNICODE_NORMALIZATION
1496                     case UTF8_MAC:
1497                         input_f = UTF8_INPUT;
1498                         nfc_f = TRUE;
1499                         break;
1500 #endif
1501                     case UTF_16:
1502                     case UTF_16BE:
1503                     case UTF_16BE_BOM:
1504                         input_f = UTF16_INPUT;
1505                         input_endian = ENDIAN_BIG;
1506                         break;
1507                     case UTF_16LE:
1508                     case UTF_16LE_BOM:
1509                         input_f = UTF16_INPUT;
1510                         input_endian = ENDIAN_LITTLE;
1511                         break;
1512                     case UTF_32:
1513                     case UTF_32BE:
1514                     case UTF_32BE_BOM:
1515                         input_f = UTF32_INPUT;
1516                         input_endian = ENDIAN_BIG;
1517                         break;
1518                     case UTF_32LE:
1519                     case UTF_32LE_BOM:
1520                         input_f = UTF32_INPUT;
1521                         input_endian = ENDIAN_LITTLE;
1522                         break;
1523 #endif
1524                     default:
1525                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1526                         break;
1527                     }
1528                     continue;
1529                 }
1530                 if (strcmp(long_option[i].name, "oc=") == 0){
1531                     x0201_f = FALSE;
1532                     nkf_str_upcase(p, codeset, 32);
1533                     output_encoding = nkf_enc_find(codeset);\r
1534                     switch (nkf_enc_to_index(output_encoding)) {\r
1535                     case ISO_2022_JP:
1536                         output_conv = j_oconv;
1537                         break;
1538                     case CP50220:
1539                             output_conv = j_oconv;
1540                             x0201_f = TRUE;
1541 #ifdef SHIFTJIS_CP932
1542                             cp932inv_f = FALSE;
1543 #endif
1544 #ifdef UTF8_OUTPUT_ENABLE
1545                             ms_ucs_map_f = UCS_MAP_CP932;
1546 #endif
1547                         break;
1548                     case CP50221:
1549                         output_conv = j_oconv;
1550 #ifdef SHIFTJIS_CP932
1551                         cp932inv_f = FALSE;
1552 #endif
1553 #ifdef UTF8_OUTPUT_ENABLE
1554                         ms_ucs_map_f = UCS_MAP_CP932;
1555 #endif
1556                         break;
1557                     case ISO_2022_JP_1:
1558                         output_conv = j_oconv;
1559 #ifdef X0212_ENABLE
1560                         x0212_f = TRUE;
1561 #endif
1562 #ifdef SHIFTJIS_CP932
1563                         cp932inv_f = FALSE;
1564 #endif
1565                         break;
1566                     case ISO_2022_JP_3:
1567                         output_conv = j_oconv;
1568 #ifdef X0212_ENABLE
1569                         x0212_f = TRUE;
1570 #endif
1571                         x0213_f = TRUE;
1572 #ifdef SHIFTJIS_CP932
1573                         cp932inv_f = FALSE;
1574 #endif
1575                         break;
1576                     case SHIFT_JIS:
1577                         output_conv = s_oconv;
1578                         break;
1579                     case WINDOWS_31J:
1580                         output_conv = s_oconv;
1581 #ifdef UTF8_OUTPUT_ENABLE
1582                         ms_ucs_map_f = UCS_MAP_CP932;
1583 #endif
1584                         break;
1585                     case CP10001:
1586                         output_conv = s_oconv;
1587 #ifdef UTF8_OUTPUT_ENABLE
1588                         ms_ucs_map_f = UCS_MAP_CP10001;
1589 #endif
1590                         break;
1591                     case EUC_JP:
1592                         output_conv = e_oconv;
1593                         break;
1594                     case CP51932:
1595                         output_conv = e_oconv;
1596 #ifdef SHIFTJIS_CP932
1597                         cp932inv_f = FALSE;
1598 #endif
1599 #ifdef UTF8_OUTPUT_ENABLE
1600                         ms_ucs_map_f = UCS_MAP_CP932;
1601 #endif
1602                         break;
1603                     case EUCJP_MS:
1604                         output_conv = e_oconv;
1605 #ifdef X0212_ENABLE
1606                         x0212_f = TRUE;
1607 #endif
1608 #ifdef UTF8_OUTPUT_ENABLE
1609                         ms_ucs_map_f = UCS_MAP_MS;
1610 #endif
1611                         break;
1612                     case EUCJP_ASCII:
1613                         output_conv = e_oconv;
1614 #ifdef X0212_ENABLE
1615                         x0212_f = TRUE;
1616 #endif
1617 #ifdef UTF8_OUTPUT_ENABLE
1618                         ms_ucs_map_f = UCS_MAP_ASCII;
1619 #endif
1620                         break;
1621                     case SHIFT_JISX0213:
1622                     case SHIFT_JIS_2004:
1623                             output_conv = s_oconv;
1624                             x0213_f = TRUE;
1625 #ifdef SHIFTJIS_CP932
1626                             cp932inv_f = FALSE;
1627 #endif
1628                         break;
1629                     case EUC_JISX0213:
1630                     case EUC_JIS_2004:
1631                         output_conv = e_oconv;
1632 #ifdef X0212_ENABLE
1633                         x0212_f = TRUE;
1634 #endif
1635                         x0213_f = TRUE;
1636 #ifdef SHIFTJIS_CP932
1637                         cp932inv_f = FALSE;
1638 #endif
1639                         break;
1640 #ifdef UTF8_OUTPUT_ENABLE
1641                     case UTF_8:
1642                     case UTF_8N:
1643                         output_conv = w_oconv;
1644                         break;
1645                     case UTF_8_BOM:
1646                         output_conv = w_oconv;
1647                         output_bom_f = TRUE;
1648                         break;
1649                     case UTF_16BE:
1650                         output_conv = w_oconv16;
1651                         break;
1652                     case UTF_16:
1653                     case UTF_16BE_BOM:
1654                         output_conv = w_oconv16;
1655                         output_bom_f = TRUE;
1656                         break;
1657                     case UTF_16LE:
1658                         output_conv = w_oconv16;
1659                         output_endian = ENDIAN_LITTLE;
1660                         break;
1661                     case UTF_16LE_BOM:
1662                         output_conv = w_oconv16;
1663                         output_endian = ENDIAN_LITTLE;
1664                         output_bom_f = TRUE;
1665                         break;
1666                     case UTF_32:
1667                     case UTF_32BE:
1668                         output_conv = w_oconv32;
1669                         break;
1670                     case UTF_32BE_BOM:
1671                         output_conv = w_oconv32;
1672                         output_bom_f = TRUE;
1673                         break;
1674                     case UTF_32LE:
1675                         output_conv = w_oconv32;
1676                         output_endian = ENDIAN_LITTLE;
1677                         break;
1678                     case UTF_32LE_BOM:
1679                         output_conv = w_oconv32;
1680                         output_endian = ENDIAN_LITTLE;
1681                         output_bom_f = TRUE;
1682                         break;
1683 #endif
1684                     default:
1685                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1686                         break;
1687                     }
1688                     continue;
1689                 }
1690                 if (strcmp(long_option[i].name, "guess=") == 0){
1691                     if (p[0] == '1') {
1692                         guess_f = 2;
1693                     } else {
1694                         guess_f = 1;
1695                     }
1696                     continue;
1697                 }
1698 #ifdef OVERWRITE
1699                 if (strcmp(long_option[i].name, "overwrite") == 0){
1700                     file_out_f = TRUE;
1701                     overwrite_f = TRUE;
1702                     preserve_time_f = TRUE;
1703                     continue;
1704                 }
1705                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1706                     file_out_f = TRUE;
1707                     overwrite_f = TRUE;
1708                     preserve_time_f = TRUE;
1709                     backup_f = TRUE;
1710                     backup_suffix = malloc(strlen((char *) p) + 1);
1711                     strcpy(backup_suffix, (char *) p);
1712                     continue;
1713                 }
1714                 if (strcmp(long_option[i].name, "in-place") == 0){
1715                     file_out_f = TRUE;
1716                     overwrite_f = TRUE;
1717                     preserve_time_f = FALSE;
1718                     continue;
1719                 }
1720                 if (strcmp(long_option[i].name, "in-place=") == 0){
1721                     file_out_f = TRUE;
1722                     overwrite_f = TRUE;
1723                     preserve_time_f = FALSE;
1724                     backup_f = TRUE;
1725                     backup_suffix = malloc(strlen((char *) p) + 1);
1726                     strcpy(backup_suffix, (char *) p);
1727                     continue;
1728                 }
1729 #endif
1730 #ifdef INPUT_OPTION
1731                 if (strcmp(long_option[i].name, "cap-input") == 0){
1732                     cap_f = TRUE;
1733                     continue;
1734                 }
1735                 if (strcmp(long_option[i].name, "url-input") == 0){
1736                     url_f = TRUE;
1737                     continue;
1738                 }
1739 #endif
1740 #ifdef NUMCHAR_OPTION
1741                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1742                     numchar_f = TRUE;
1743                     continue;
1744                 }
1745 #endif
1746 #ifdef CHECK_OPTION
1747                 if (strcmp(long_option[i].name, "no-output") == 0){
1748                     noout_f = TRUE;
1749                     continue;
1750                 }
1751                 if (strcmp(long_option[i].name, "debug") == 0){
1752                     debug_f = TRUE;
1753                     continue;
1754                 }
1755 #endif
1756                 if (strcmp(long_option[i].name, "cp932") == 0){
1757 #ifdef SHIFTJIS_CP932
1758                     cp51932_f = TRUE;
1759                     cp932inv_f = TRUE;
1760 #endif
1761 #ifdef UTF8_OUTPUT_ENABLE
1762                     ms_ucs_map_f = UCS_MAP_CP932;
1763 #endif
1764                     continue;
1765                 }
1766                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1767 #ifdef SHIFTJIS_CP932
1768                     cp51932_f = FALSE;
1769                     cp932inv_f = FALSE;
1770 #endif
1771 #ifdef UTF8_OUTPUT_ENABLE
1772                     ms_ucs_map_f = UCS_MAP_ASCII;
1773 #endif
1774                     continue;
1775                 }
1776 #ifdef SHIFTJIS_CP932
1777                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1778                     cp932inv_f = TRUE;
1779                     continue;
1780                 }
1781 #endif
1782
1783 #ifdef X0212_ENABLE
1784                 if (strcmp(long_option[i].name, "x0212") == 0){
1785                     x0212_f = TRUE;
1786                     continue;
1787                 }
1788 #endif
1789
1790 #ifdef EXEC_IO
1791                   if (strcmp(long_option[i].name, "exec-in") == 0){
1792                       exec_f = 1;
1793                       return;
1794                   }
1795                   if (strcmp(long_option[i].name, "exec-out") == 0){
1796                       exec_f = -1;
1797                       return;
1798                   }
1799 #endif
1800 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1801                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1802                     no_cp932ext_f = TRUE;
1803                     continue;
1804                 }
1805                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1806                     no_best_fit_chars_f = TRUE;
1807                     continue;
1808                 }
1809                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1810                     encode_fallback = NULL;
1811                     continue;
1812                 }
1813                 if (strcmp(long_option[i].name, "fb-html") == 0){
1814                     encode_fallback = encode_fallback_html;
1815                     continue;
1816                 }
1817                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1818                     encode_fallback = encode_fallback_xml;
1819                     continue;
1820                 }
1821                 if (strcmp(long_option[i].name, "fb-java") == 0){
1822                     encode_fallback = encode_fallback_java;
1823                     continue;
1824                 }
1825                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1826                     encode_fallback = encode_fallback_perl;
1827                     continue;
1828                 }
1829                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1830                     encode_fallback = encode_fallback_subchar;
1831                     continue;
1832                 }
1833                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1834                     encode_fallback = encode_fallback_subchar;
1835                     unicode_subchar = 0;
1836                     if (p[0] != '0'){
1837                         /* decimal number */
1838                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1839                             unicode_subchar *= 10;
1840                             unicode_subchar += hex2bin(p[i]);
1841                         }
1842                     }else if(p[1] == 'x' || p[1] == 'X'){
1843                         /* hexadecimal number */
1844                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1845                             unicode_subchar <<= 4;
1846                             unicode_subchar |= hex2bin(p[i]);
1847                         }
1848                     }else{
1849                         /* octal number */
1850                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1851                             unicode_subchar *= 8;
1852                             unicode_subchar += hex2bin(p[i]);
1853                         }
1854                     }
1855                     w16e_conv(unicode_subchar, &i, &j);
1856                     unicode_subchar = i<<8 | j;
1857                     continue;
1858                 }
1859 #endif
1860 #ifdef UTF8_OUTPUT_ENABLE
1861                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1862                     ms_ucs_map_f = UCS_MAP_MS;
1863                     continue;
1864                 }
1865 #endif
1866 #ifdef UNICODE_NORMALIZATION
1867                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1868                     input_f = UTF8_INPUT;
1869                     nfc_f = TRUE;
1870                     continue;
1871                 }
1872 #endif
1873                 if (strcmp(long_option[i].name, "prefix=") == 0){
1874                     if (nkf_isgraph(p[0])){
1875                         for (i = 1; nkf_isgraph(p[i]); i++){
1876                             prefix_table[p[i]] = p[0];
1877                         }
1878                     }
1879                     continue;
1880                 }
1881             }
1882             continue;
1883         case 'b':           /* buffered mode */
1884             unbuf_f = FALSE;
1885             continue;
1886         case 'u':           /* non bufferd mode */
1887             unbuf_f = TRUE;
1888             continue;
1889         case 't':           /* transparent mode */
1890             if (*cp=='1') {
1891                 /* alias of -t */
1892                 nop_f = TRUE;
1893                 *cp++;
1894             } else if (*cp=='2') {
1895                 /*
1896                  * -t with put/get
1897                  *
1898                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1899                  *
1900                  */
1901                 nop_f = 2;
1902                 *cp++;
1903             } else
1904                 nop_f = TRUE;
1905             continue;
1906         case 'j':           /* JIS output */
1907         case 'n':
1908             output_conv = j_oconv;
1909             output_encoding = nkf_enc_from_index(ISO_2022_JP);\r
1910             continue;
1911         case 'e':           /* AT&T EUC output */
1912             output_conv = e_oconv;
1913             cp932inv_f = FALSE;
1914             output_encoding = nkf_enc_from_index(EUC_JP);\r
1915             continue;
1916         case 's':           /* SJIS output */
1917             output_conv = s_oconv;
1918             output_encoding = nkf_enc_from_index(SHIFT_JIS);\r
1919             continue;
1920         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1921             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1922             input_f = LATIN1_INPUT;
1923             continue;
1924         case 'i':           /* Kanji IN ESC-$-@/B */
1925             if (*cp=='@'||*cp=='B')
1926                 kanji_intro = *cp++;
1927             continue;
1928         case 'o':           /* ASCII IN ESC-(-J/B */
1929             if (*cp=='J'||*cp=='B'||*cp=='H')
1930                 ascii_intro = *cp++;
1931             continue;
1932         case 'h':
1933             /*
1934                 bit:1   katakana->hiragana
1935                 bit:2   hiragana->katakana
1936             */
1937             if ('9'>= *cp && *cp>='0')
1938                 hira_f |= (*cp++ -'0');
1939             else
1940                 hira_f |= 1;
1941             continue;
1942         case 'r':
1943             rot_f = TRUE;
1944             continue;
1945 #if defined(MSDOS) || defined(__OS2__)
1946         case 'T':
1947             binmode_f = FALSE;
1948             continue;
1949 #endif
1950 #ifndef PERL_XS
1951         case 'V':
1952             show_configuration();
1953             exit(1);
1954             break;
1955         case 'v':
1956             usage();
1957             exit(1);
1958             break;
1959 #endif
1960 #ifdef UTF8_OUTPUT_ENABLE
1961         case 'w':           /* UTF-8 output */
1962             if (cp[0] == '8') {
1963                 output_conv = w_oconv; cp++;
1964                 if (cp[0] == '0'){
1965                     cp++;
1966                     output_encoding = nkf_enc_from_index(UTF_8N);\r
1967                 } else {
1968                     output_bom_f = TRUE;
1969                     output_encoding = nkf_enc_from_index(UTF_8_BOM);\r
1970                 }
1971             } else {
1972                 int enc_idx;\r
1973                 if ('1'== cp[0] && '6'==cp[1]) {\r
1974                     output_conv = w_oconv16; cp+=2;
1975                     enc_idx = UTF_16;\r
1976                 } else if ('3'== cp[0] && '2'==cp[1]) {
1977                     output_conv = w_oconv32; cp+=2;
1978                     enc_idx = UTF_32;\r
1979                 } else {
1980                     output_conv = w_oconv;
1981                     output_encoding = nkf_enc_from_index(UTF_8);\r
1982                     continue;
1983                 }
1984                 if (cp[0]=='L') {
1985                     cp++;
1986                     output_endian = ENDIAN_LITTLE;
1987                 } else if (cp[0] == 'B') {
1988                     cp++;
1989                 } else {
1990                     output_encoding = nkf_enc_from_index(enc_idx);\r
1991                     continue;
1992                 }
1993                 if (cp[0] == '0'){
1994                     cp++;
1995                     enc_idx = enc_idx == UTF_16\r
1996                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1997                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1998                 } else {
1999                     output_bom_f = TRUE;
2000                     enc_idx = enc_idx == UTF_16\r
2001                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
2002                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
2003                 }
2004                 output_encoding = nkf_enc_from_index(enc_idx);\r
2005             }
2006             continue;
2007 #endif
2008 #ifdef UTF8_INPUT_ENABLE
2009         case 'W':           /* UTF input */
2010             if (cp[0] == '8') {
2011                 cp++;
2012                 input_f = UTF8_INPUT;
2013             }else{
2014                 if ('1'== cp[0] && '6'==cp[1]) {
2015                     cp += 2;
2016                     input_f = UTF16_INPUT;
2017                     input_endian = ENDIAN_BIG;
2018                 } else if ('3'== cp[0] && '2'==cp[1]) {
2019                     cp += 2;
2020                     input_f = UTF32_INPUT;
2021                     input_endian = ENDIAN_BIG;
2022                 } else {
2023                     input_f = UTF8_INPUT;
2024                     continue;
2025                 }
2026                 if (cp[0]=='L') {
2027                     cp++;
2028                     input_endian = ENDIAN_LITTLE;
2029                 } else if (cp[0] == 'B') {
2030                     cp++;
2031                 }
2032             }
2033             continue;
2034 #endif
2035         /* Input code assumption */
2036         case 'J':   /* JIS input */
2037             input_f = JIS_INPUT;
2038             continue;
2039         case 'E':   /* AT&T EUC input */
2040             input_f = EUC_INPUT;
2041             continue;
2042         case 'S':   /* MS Kanji input */
2043             input_f = SJIS_INPUT;
2044             continue;
2045         case 'Z':   /* Convert X0208 alphabet to asii */
2046             /* alpha_f
2047                bit:0   Convert JIS X 0208 Alphabet to ASCII
2048                bit:1   Convert Kankaku to one space
2049                bit:2   Convert Kankaku to two spaces
2050                bit:3   Convert HTML Entity
2051                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
2052             */
2053             while ('0'<= *cp && *cp <='9') {
2054                 alpha_f |= 1 << (*cp++ - '0');
2055             }
2056             if (!alpha_f) alpha_f = 1;
2057             continue;
2058         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
2059             x0201_f = FALSE;    /* No X0201->X0208 conversion */
2060             /* accept  X0201
2061                     ESC-(-I     in JIS, EUC, MS Kanji
2062                     SI/SO       in JIS, EUC, MS Kanji
2063                     SSO         in EUC, JIS, not in MS Kanji
2064                     MS Kanji (0xa0-0xdf)
2065                output  X0201
2066                     ESC-(-I     in JIS (0x20-0x5f)
2067                     SSO         in EUC (0xa0-0xdf)
2068                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
2069             */
2070             continue;
2071         case 'X':   /* Convert X0201 kana to X0208 */
2072             x0201_f = TRUE;
2073             continue;
2074         case 'F':   /* prserve new lines */
2075             fold_preserve_f = TRUE;
2076         case 'f':   /* folding -f60 or -f */
2077             fold_f = TRUE;
2078             fold_len = 0;
2079             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2080                 fold_len *= 10;
2081                 fold_len += *cp++ - '0';
2082             }
2083             if (!(0<fold_len && fold_len<BUFSIZ))
2084                 fold_len = DEFAULT_FOLD;
2085             if (*cp=='-') {
2086                 fold_margin = 0;
2087                 cp++;
2088                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2089                     fold_margin *= 10;
2090                     fold_margin += *cp++ - '0';
2091                 }
2092             }
2093             continue;
2094         case 'm':   /* MIME support */
2095             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
2096             if (*cp=='B'||*cp=='Q') {
2097                 mime_decode_mode = *cp++;
2098                 mimebuf_f = FIXED_MIME;
2099             } else if (*cp=='N') {
2100                 mime_f = TRUE; cp++;
2101             } else if (*cp=='S') {
2102                 mime_f = STRICT_MIME; cp++;
2103             } else if (*cp=='0') {
2104                 mime_decode_f = FALSE;
2105                 mime_f = FALSE; cp++;
2106             }
2107             continue;
2108         case 'M':   /* MIME output */
2109             if (*cp=='B') {
2110                 mimeout_mode = 'B';
2111                 mimeout_f = FIXED_MIME; cp++;
2112             } else if (*cp=='Q') {
2113                 mimeout_mode = 'Q';
2114                 mimeout_f = FIXED_MIME; cp++;
2115             } else {
2116                 mimeout_f = TRUE;
2117             }
2118             continue;
2119         case 'B':   /* Broken JIS support */
2120             /*  bit:0   no ESC JIS
2121                 bit:1   allow any x on ESC-(-x or ESC-$-x
2122                 bit:2   reset to ascii on NL
2123             */
2124             if ('9'>= *cp && *cp>='0')
2125                 broken_f |= 1<<(*cp++ -'0');
2126             else
2127                 broken_f |= TRUE;
2128             continue;
2129 #ifndef PERL_XS
2130         case 'O':/* for Output file */
2131             file_out_f = TRUE;
2132             continue;
2133 #endif
2134         case 'c':/* add cr code */
2135             nlmode_f = CRLF;
2136             continue;
2137         case 'd':/* delete cr code */
2138             nlmode_f = LF;
2139             continue;
2140         case 'I':   /* ISO-2022-JP output */
2141             iso2022jp_f = TRUE;
2142             continue;
2143         case 'L':  /* line mode */
2144             if (*cp=='u') {         /* unix */
2145                 nlmode_f = LF; cp++;
2146             } else if (*cp=='m') { /* mac */
2147                 nlmode_f = CR; cp++;
2148             } else if (*cp=='w') { /* windows */
2149                 nlmode_f = CRLF; cp++;
2150             } else if (*cp=='0') { /* no conversion  */
2151                 nlmode_f = 0; cp++;
2152             }
2153             continue;
2154 #ifndef PERL_XS
2155         case 'g':
2156             if (*cp == '1') {
2157                 guess_f = 2;
2158                 cp++;
2159             } else if (*cp == '0') {
2160                 guess_f = 1;
2161                 cp++;
2162             } else {
2163                 guess_f = 1;
2164             }
2165             continue;
2166 #endif
2167         case SP:
2168         /* module muliple options in a string are allowed for Perl moudle  */
2169             while(*cp && *cp++!='-');
2170             continue;
2171         default:
2172             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
2173             /* bogus option but ignored */
2174             continue;
2175         }
2176     }
2177 }
2178
2179 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2180 {
2181     if (iconv_func){
2182         struct input_code *p = input_code_list;
2183         while (p->name){
2184             if (iconv_func == p->iconv_func){
2185                 return p;
2186             }
2187             p++;
2188         }
2189     }
2190     return 0;
2191 }
2192
2193 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2194 {
2195 #ifdef INPUT_CODE_FIX
2196     if (f || !input_f)
2197 #endif
2198         if (estab_f != f){
2199             estab_f = f;
2200         }
2201
2202     if (iconv_func
2203 #ifdef INPUT_CODE_FIX
2204         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
2205 #endif
2206         ){
2207         iconv = iconv_func;
2208     }
2209 #ifdef CHECK_OPTION
2210     if (estab_f && iconv_for_check != iconv){
2211         struct input_code *p = find_inputcode_byfunc(iconv);
2212         if (p){
2213             set_input_codename(p->name);
2214             debug(p->name);
2215         }
2216         iconv_for_check = iconv;
2217     }
2218 #endif
2219 }
2220
2221 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
2222 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
2223 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
2224 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
2225 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2226 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
2227 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
2228 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
2229
2230 #define SCORE_INIT (SCORE_iMIME)
2231
2232 static const char score_table_A0[] = {
2233     0, 0, 0, 0,
2234     0, 0, 0, 0,
2235     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2236     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2237 };
2238
2239 static const char score_table_F0[] = {
2240     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2241     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2242     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2243     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2244 };
2245
2246 void set_code_score(struct input_code *ptr, nkf_char score)
2247 {
2248     if (ptr){
2249         ptr->score |= score;
2250     }
2251 }
2252
2253 void clr_code_score(struct input_code *ptr, nkf_char score)
2254 {
2255     if (ptr){
2256         ptr->score &= ~score;
2257     }
2258 }
2259
2260 void code_score(struct input_code *ptr)
2261 {
2262     nkf_char c2 = ptr->buf[0];
2263 #ifdef UTF8_OUTPUT_ENABLE
2264     nkf_char c1 = ptr->buf[1];
2265 #endif
2266     if (c2 < 0){
2267         set_code_score(ptr, SCORE_ERROR);
2268     }else if (c2 == SSO){
2269         set_code_score(ptr, SCORE_KANA);
2270     }else if (c2 == 0x8f){
2271         set_code_score(ptr, SCORE_X0212);
2272 #ifdef UTF8_OUTPUT_ENABLE
2273     }else if (!e2w_conv(c2, c1)){
2274         set_code_score(ptr, SCORE_NO_EXIST);
2275 #endif
2276     }else if ((c2 & 0x70) == 0x20){
2277         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2278     }else if ((c2 & 0x70) == 0x70){
2279         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2280     }else if ((c2 & 0x70) >= 0x50){
2281         set_code_score(ptr, SCORE_L2);
2282     }
2283 }
2284
2285 void status_disable(struct input_code *ptr)
2286 {
2287     ptr->stat = -1;
2288     ptr->buf[0] = -1;
2289     code_score(ptr);
2290     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2291 }
2292
2293 void status_push_ch(struct input_code *ptr, nkf_char c)
2294 {
2295     ptr->buf[ptr->index++] = c;
2296 }
2297
2298 void status_clear(struct input_code *ptr)
2299 {
2300     ptr->stat = 0;
2301     ptr->index = 0;
2302 }
2303
2304 void status_reset(struct input_code *ptr)
2305 {
2306     status_clear(ptr);
2307     ptr->score = SCORE_INIT;
2308 }
2309
2310 void status_reinit(struct input_code *ptr)
2311 {
2312     status_reset(ptr);
2313     ptr->_file_stat = 0;
2314 }
2315
2316 void status_check(struct input_code *ptr, nkf_char c)
2317 {
2318     if (c <= DEL && estab_f){
2319         status_reset(ptr);
2320     }
2321 }
2322
2323 void s_status(struct input_code *ptr, nkf_char c)
2324 {
2325     switch(ptr->stat){
2326       case -1:
2327           status_check(ptr, c);
2328           break;
2329       case 0:
2330           if (c <= DEL){
2331               break;
2332 #ifdef NUMCHAR_OPTION
2333           }else if (is_unicode_capsule(c)){
2334               break;
2335 #endif
2336           }else if (0xa1 <= c && c <= 0xdf){
2337               status_push_ch(ptr, SSO);
2338               status_push_ch(ptr, c);
2339               code_score(ptr);
2340               status_clear(ptr);
2341           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2342               ptr->stat = 1;
2343               status_push_ch(ptr, c);
2344           }else if (0xed <= c && c <= 0xee){
2345               ptr->stat = 3;
2346               status_push_ch(ptr, c);
2347 #ifdef SHIFTJIS_CP932
2348           }else if (is_ibmext_in_sjis(c)){
2349               ptr->stat = 2;
2350               status_push_ch(ptr, c);
2351 #endif /* SHIFTJIS_CP932 */
2352 #ifdef X0212_ENABLE
2353           }else if (0xf0 <= c && c <= 0xfc){
2354               ptr->stat = 1;
2355               status_push_ch(ptr, c);
2356 #endif /* X0212_ENABLE */
2357           }else{
2358               status_disable(ptr);
2359           }
2360           break;
2361       case 1:
2362           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2363               status_push_ch(ptr, c);
2364               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2365               code_score(ptr);
2366               status_clear(ptr);
2367           }else{
2368               status_disable(ptr);
2369           }
2370           break;
2371       case 2:
2372 #ifdef SHIFTJIS_CP932
2373         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2374             status_push_ch(ptr, c);
2375             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2376                 set_code_score(ptr, SCORE_CP932);
2377                 status_clear(ptr);
2378                 break;
2379             }
2380         }
2381 #endif /* SHIFTJIS_CP932 */
2382         status_disable(ptr);
2383           break;
2384       case 3:
2385           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2386               status_push_ch(ptr, c);
2387               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2388             set_code_score(ptr, SCORE_CP932);
2389             status_clear(ptr);
2390           }else{
2391               status_disable(ptr);
2392           }
2393           break;
2394     }
2395 }
2396
2397 void e_status(struct input_code *ptr, nkf_char c)
2398 {
2399     switch (ptr->stat){
2400       case -1:
2401           status_check(ptr, c);
2402           break;
2403       case 0:
2404           if (c <= DEL){
2405               break;
2406 #ifdef NUMCHAR_OPTION
2407           }else if (is_unicode_capsule(c)){
2408               break;
2409 #endif
2410           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2411               ptr->stat = 1;
2412               status_push_ch(ptr, c);
2413 #ifdef X0212_ENABLE
2414           }else if (0x8f == c){
2415               ptr->stat = 2;
2416               status_push_ch(ptr, c);
2417 #endif /* X0212_ENABLE */
2418           }else{
2419               status_disable(ptr);
2420           }
2421           break;
2422       case 1:
2423           if (0xa1 <= c && c <= 0xfe){
2424               status_push_ch(ptr, c);
2425               code_score(ptr);
2426               status_clear(ptr);
2427           }else{
2428               status_disable(ptr);
2429           }
2430           break;
2431 #ifdef X0212_ENABLE
2432       case 2:
2433           if (0xa1 <= c && c <= 0xfe){
2434               ptr->stat = 1;
2435               status_push_ch(ptr, c);
2436           }else{
2437               status_disable(ptr);
2438           }
2439 #endif /* X0212_ENABLE */
2440     }
2441 }
2442
2443 #ifdef UTF8_INPUT_ENABLE
2444 void w_status(struct input_code *ptr, nkf_char c)
2445 {
2446     switch (ptr->stat){
2447       case -1:
2448           status_check(ptr, c);
2449           break;
2450       case 0:
2451           if (c <= DEL){
2452               break;
2453 #ifdef NUMCHAR_OPTION
2454           }else if (is_unicode_capsule(c)){
2455               break;
2456 #endif
2457           }else if (0xc0 <= c && c <= 0xdf){
2458               ptr->stat = 1;
2459               status_push_ch(ptr, c);
2460           }else if (0xe0 <= c && c <= 0xef){
2461               ptr->stat = 2;
2462               status_push_ch(ptr, c);
2463           }else if (0xf0 <= c && c <= 0xf4){
2464               ptr->stat = 3;
2465               status_push_ch(ptr, c);
2466           }else{
2467               status_disable(ptr);
2468           }
2469           break;
2470       case 1:
2471       case 2:
2472           if (0x80 <= c && c <= 0xbf){
2473               status_push_ch(ptr, c);
2474               if (ptr->index > ptr->stat){
2475                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2476                              && ptr->buf[2] == 0xbf);
2477                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2478                            &ptr->buf[0], &ptr->buf[1]);
2479                   if (!bom){
2480                       code_score(ptr);
2481                   }
2482                   status_clear(ptr);
2483               }
2484           }else{
2485               status_disable(ptr);
2486           }
2487           break;
2488       case 3:
2489         if (0x80 <= c && c <= 0xbf){
2490             if (ptr->index < ptr->stat){
2491                 status_push_ch(ptr, c);
2492             } else {
2493                 status_clear(ptr);
2494             }
2495           }else{
2496               status_disable(ptr);
2497           }
2498           break;
2499     }
2500 }
2501 #endif
2502
2503 void code_status(nkf_char c)
2504 {
2505     int action_flag = 1;
2506     struct input_code *result = 0;
2507     struct input_code *p = input_code_list;
2508     while (p->name){
2509         if (!p->status_func) {
2510             ++p;
2511             continue;
2512         }
2513         if (!p->status_func)
2514             continue;
2515         (p->status_func)(p, c);
2516         if (p->stat > 0){
2517             action_flag = 0;
2518         }else if(p->stat == 0){
2519             if (result){
2520                 action_flag = 0;
2521             }else{
2522                 result = p;
2523             }
2524         }
2525         ++p;
2526     }
2527
2528     if (action_flag){
2529         if (result && !estab_f){
2530             set_iconv(TRUE, result->iconv_func);
2531         }else if (c <= DEL){
2532             struct input_code *ptr = input_code_list;
2533             while (ptr->name){
2534                 status_reset(ptr);
2535                 ++ptr;
2536             }
2537         }
2538     }
2539 }
2540
2541 #ifndef WIN32DLL
2542 nkf_char std_getc(FILE *f)
2543 {
2544     if (std_gc_ndx){
2545         return std_gc_buf[--std_gc_ndx];
2546     }
2547     return getc(f);
2548 }
2549 #endif /*WIN32DLL*/
2550
2551 nkf_char std_ungetc(nkf_char c, FILE *f)
2552 {
2553     if (std_gc_ndx == STD_GC_BUFSIZE){
2554         return EOF;
2555     }
2556     std_gc_buf[std_gc_ndx++] = c;
2557     return c;
2558 }
2559
2560 #ifndef WIN32DLL
2561 void std_putc(nkf_char c)
2562 {
2563     if(c!=EOF)
2564       putchar(c);
2565 }
2566 #endif /*WIN32DLL*/
2567
2568 #if !defined(PERL_XS) && !defined(WIN32DLL)
2569 nkf_char noconvert(FILE *f)
2570 {
2571     nkf_char    c;
2572
2573     if (nop_f == 2)
2574         module_connection();
2575     while ((c = (*i_getc)(f)) != EOF)
2576       (*o_putc)(c);
2577     (*o_putc)(EOF);
2578     return 1;
2579 }
2580 #endif
2581
2582 void module_connection(void)
2583 {
2584     oconv = output_conv;
2585     o_putc = std_putc;
2586
2587     /* replace continucation module, from output side */
2588
2589     /* output redicrection */
2590 #ifdef CHECK_OPTION
2591     if (noout_f || guess_f){
2592         o_putc = no_putc;
2593     }
2594 #endif
2595     if (mimeout_f) {
2596         o_mputc = o_putc;
2597         o_putc = mime_putc;
2598         if (mimeout_f == TRUE) {
2599             o_base64conv = oconv; oconv = base64_conv;
2600         }
2601         /* base64_count = 0; */
2602     }
2603
2604     if (nlmode_f || guess_f) {
2605         o_nlconv = oconv; oconv = nl_conv;
2606     }
2607     if (rot_f) {
2608         o_rot_conv = oconv; oconv = rot_conv;
2609     }
2610     if (iso2022jp_f) {
2611         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2612     }
2613     if (hira_f) {
2614         o_hira_conv = oconv; oconv = hira_conv;
2615     }
2616     if (fold_f) {
2617         o_fconv = oconv; oconv = fold_conv;
2618         f_line = 0;
2619     }
2620     if (alpha_f || x0201_f) {
2621         o_zconv = oconv; oconv = z_conv;
2622     }
2623
2624     i_getc = std_getc;
2625     i_ungetc = std_ungetc;
2626     /* input redicrection */
2627 #ifdef INPUT_OPTION
2628     if (cap_f){
2629         i_cgetc = i_getc; i_getc = cap_getc;
2630         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2631     }
2632     if (url_f){
2633         i_ugetc = i_getc; i_getc = url_getc;
2634         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2635     }
2636 #endif
2637 #ifdef NUMCHAR_OPTION
2638     if (numchar_f){
2639         i_ngetc = i_getc; i_getc = numchar_getc;
2640         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2641     }
2642 #endif
2643 #ifdef UNICODE_NORMALIZATION
2644     if (nfc_f && input_f == UTF8_INPUT){
2645         i_nfc_getc = i_getc; i_getc = nfc_getc;
2646         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2647     }
2648 #endif
2649     if (mime_f && mimebuf_f==FIXED_MIME) {
2650         i_mgetc = i_getc; i_getc = mime_getc;
2651         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2652     }
2653     if (broken_f & 1) {
2654         i_bgetc = i_getc; i_getc = broken_getc;
2655         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2656     }
2657     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2658         set_iconv(-TRUE, e_iconv);
2659     } else if (input_f == SJIS_INPUT) {
2660         set_iconv(-TRUE, s_iconv);
2661 #ifdef UTF8_INPUT_ENABLE
2662     } else if (input_f == UTF8_INPUT) {
2663         set_iconv(-TRUE, w_iconv);
2664     } else if (input_f == UTF16_INPUT) {
2665         set_iconv(-TRUE, w_iconv16);
2666     } else if (input_f == UTF32_INPUT) {
2667         set_iconv(-TRUE, w_iconv32);
2668 #endif
2669     } else {
2670         set_iconv(FALSE, e_iconv);
2671     }
2672
2673     {
2674         struct input_code *p = input_code_list;
2675         while (p->name){
2676             status_reinit(p++);
2677         }
2678     }
2679 }
2680
2681 /*
2682  * Check and Ignore BOM
2683  */
2684 void check_bom(FILE *f)
2685 {
2686     int c2;
2687     switch(c2 = (*i_getc)(f)){
2688     case 0x00:
2689         if((c2 = (*i_getc)(f)) == 0x00){
2690             if((c2 = (*i_getc)(f)) == 0xFE){
2691                 if((c2 = (*i_getc)(f)) == 0xFF){
2692                     if(!input_f){
2693                         set_iconv(TRUE, w_iconv32);
2694                     }
2695                     if (iconv == w_iconv32) {
2696                         input_endian = ENDIAN_BIG;
2697                         return;
2698                     }
2699                     (*i_ungetc)(0xFF,f);
2700                 }else (*i_ungetc)(c2,f);
2701                 (*i_ungetc)(0xFE,f);
2702             }else if(c2 == 0xFF){
2703                 if((c2 = (*i_getc)(f)) == 0xFE){
2704                     if(!input_f){
2705                         set_iconv(TRUE, w_iconv32);
2706                     }
2707                     if (iconv == w_iconv32) {
2708                         input_endian = ENDIAN_2143;
2709                         return;
2710                     }
2711                     (*i_ungetc)(0xFF,f);
2712                 }else (*i_ungetc)(c2,f);
2713                 (*i_ungetc)(0xFF,f);
2714             }else (*i_ungetc)(c2,f);
2715             (*i_ungetc)(0x00,f);
2716         }else (*i_ungetc)(c2,f);
2717         (*i_ungetc)(0x00,f);
2718         break;
2719     case 0xEF:
2720         if((c2 = (*i_getc)(f)) == 0xBB){
2721             if((c2 = (*i_getc)(f)) == 0xBF){
2722                 if(!input_f){
2723                     set_iconv(TRUE, w_iconv);
2724                 }
2725                 if (iconv == w_iconv) {
2726                     return;
2727                 }
2728                 (*i_ungetc)(0xBF,f);
2729             }else (*i_ungetc)(c2,f);
2730             (*i_ungetc)(0xBB,f);
2731         }else (*i_ungetc)(c2,f);
2732         (*i_ungetc)(0xEF,f);
2733         break;
2734     case 0xFE:
2735         if((c2 = (*i_getc)(f)) == 0xFF){
2736             if((c2 = (*i_getc)(f)) == 0x00){
2737                 if((c2 = (*i_getc)(f)) == 0x00){
2738                     if(!input_f){
2739                         set_iconv(TRUE, w_iconv32);
2740                     }
2741                     if (iconv == w_iconv32) {
2742                         input_endian = ENDIAN_3412;
2743                         return;
2744                     }
2745                     (*i_ungetc)(0x00,f);
2746                 }else (*i_ungetc)(c2,f);
2747                 (*i_ungetc)(0x00,f);
2748             }else (*i_ungetc)(c2,f);
2749             if(!input_f){
2750                 set_iconv(TRUE, w_iconv16);
2751             }
2752             if (iconv == w_iconv16) {
2753                 input_endian = ENDIAN_BIG;
2754                 return;
2755             }
2756             (*i_ungetc)(0xFF,f);
2757         }else (*i_ungetc)(c2,f);
2758         (*i_ungetc)(0xFE,f);
2759         break;
2760     case 0xFF:
2761         if((c2 = (*i_getc)(f)) == 0xFE){
2762             if((c2 = (*i_getc)(f)) == 0x00){
2763                 if((c2 = (*i_getc)(f)) == 0x00){
2764                     if(!input_f){
2765                         set_iconv(TRUE, w_iconv32);
2766                     }
2767                     if (iconv == w_iconv32) {
2768                         input_endian = ENDIAN_LITTLE;
2769                         return;
2770                     }
2771                     (*i_ungetc)(0x00,f);
2772                 }else (*i_ungetc)(c2,f);
2773                 (*i_ungetc)(0x00,f);
2774             }else (*i_ungetc)(c2,f);
2775             if(!input_f){
2776                 set_iconv(TRUE, w_iconv16);
2777             }
2778             if (iconv == w_iconv16) {
2779                 input_endian = ENDIAN_LITTLE;
2780                 return;
2781             }
2782             (*i_ungetc)(0xFE,f);
2783         }else (*i_ungetc)(c2,f);
2784         (*i_ungetc)(0xFF,f);
2785         break;
2786     default:
2787         (*i_ungetc)(c2,f);
2788         break;
2789     }
2790 }
2791
2792 /*
2793    Conversion main loop. Code detection only.
2794  */
2795
2796 nkf_char kanji_convert(FILE *f)
2797 {
2798     nkf_char    c3, c2=0, c1, c0=0;
2799     int is_8bit = FALSE;
2800
2801     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2802 #ifdef UTF8_INPUT_ENABLE
2803        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2804 #endif
2805       ){
2806         is_8bit = TRUE;
2807     }
2808
2809     input_mode = ASCII;
2810     output_mode = ASCII;
2811     shift_mode = FALSE;
2812
2813 #define NEXT continue      /* no output, get next */
2814 #define SEND ;             /* output c1 and c2, get next */
2815 #define LAST break         /* end of loop, go closing  */
2816
2817     module_connection();
2818     check_bom(f);
2819
2820     while ((c1 = (*i_getc)(f)) != EOF) {
2821 #ifdef INPUT_CODE_FIX
2822         if (!input_f)
2823 #endif
2824             code_status(c1);
2825         if (c2) {
2826             /* second byte */
2827             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2828                 /* in case of 8th bit is on */
2829                 if (!estab_f&&!mime_decode_mode) {
2830                     /* in case of not established yet */
2831                     /* It is still ambiguious */
2832                     if (h_conv(f, c2, c1)==EOF)
2833                         LAST;
2834                     else
2835                         c2 = 0;
2836                     NEXT;
2837                 } else {
2838                     /* in case of already established */
2839                     if (c1 < AT) {
2840                         /* ignore bogus code and not CP5022x UCD */
2841                         c2 = 0;
2842                         NEXT;
2843                     } else {
2844                         SEND;
2845                     }
2846                 }
2847             } else
2848                 /* second byte, 7 bit code */
2849                 /* it might be kanji shitfted */
2850                 if ((c1 == DEL) || (c1 <= SP)) {
2851                     /* ignore bogus first code */
2852                     c2 = 0;
2853                     NEXT;
2854                 } else
2855                     SEND;
2856         } else {
2857             /* first byte */
2858 #ifdef UTF8_INPUT_ENABLE
2859             if (iconv == w_iconv16) {
2860                 if (input_endian == ENDIAN_BIG) {
2861                     c2 = c1;
2862                     if ((c1 = (*i_getc)(f)) != EOF) {
2863                         if (0xD8 <= c2 && c2 <= 0xDB) {
2864                             if ((c0 = (*i_getc)(f)) != EOF) {
2865                                 c0 <<= 8;
2866                                 if ((c3 = (*i_getc)(f)) != EOF) {
2867                                     c0 |= c3;
2868                                 } else c2 = EOF;
2869                             } else c2 = EOF;
2870                         }
2871                     } else c2 = EOF;
2872                 } else {
2873                     if ((c2 = (*i_getc)(f)) != EOF) {
2874                         if (0xD8 <= c2 && c2 <= 0xDB) {
2875                             if ((c3 = (*i_getc)(f)) != EOF) {
2876                                 if ((c0 = (*i_getc)(f)) != EOF) {
2877                                     c0 <<= 8;
2878                                     c0 |= c3;
2879                                 } else c2 = EOF;
2880                             } else c2 = EOF;
2881                         }
2882                     } else c2 = EOF;
2883                 }
2884                 SEND;
2885             } else if(iconv == w_iconv32){
2886                 int c3 = c1;
2887                 if((c2 = (*i_getc)(f)) != EOF &&
2888                    (c1 = (*i_getc)(f)) != EOF &&
2889                    (c0 = (*i_getc)(f)) != EOF){
2890                     switch(input_endian){
2891                     case ENDIAN_BIG:
2892                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2893                         break;
2894                     case ENDIAN_LITTLE:
2895                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2896                         break;
2897                     case ENDIAN_2143:
2898                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2899                         break;
2900                     case ENDIAN_3412:
2901                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2902                         break;
2903                     }
2904                     c2 = 0;
2905                 }else{
2906                     c2 = EOF;
2907                 }
2908                 SEND;
2909             } else
2910 #endif
2911 #ifdef NUMCHAR_OPTION
2912             if (is_unicode_capsule(c1)){
2913                 SEND;
2914             } else
2915 #endif
2916             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2917                 /* 8 bit code */
2918                 if (!estab_f && !iso8859_f) {
2919                     /* not established yet */
2920                     c2 = c1;
2921                     NEXT;
2922                 } else { /* estab_f==TRUE */
2923                     if (iso8859_f) {
2924                         c2 = ISO_8859_1;
2925                         c1 &= 0x7f;
2926                         SEND;
2927                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2928                         /* SJIS X0201 Case... */
2929                         if (iso2022jp_f && !x0201_f) {
2930                             (*oconv)(GETA1, GETA2);
2931                             NEXT;
2932                         } else {
2933                             c2 = JIS_X_0201;
2934                             c1 &= 0x7f;
2935                             SEND;
2936                         }
2937                     } else if (c1==SSO && iconv != s_iconv) {
2938                         /* EUC X0201 Case */
2939                         c1 = (*i_getc)(f);  /* skip SSO */
2940                         code_status(c1);
2941                         if (SSP<=c1 && c1<0xe0) {
2942                             if (iso2022jp_f && !x0201_f) {
2943                                 (*oconv)(GETA1, GETA2);
2944                                 NEXT;
2945                             } else {
2946                                 c2 = JIS_X_0201;
2947                                 c1 &= 0x7f;
2948                                 SEND;
2949                             }
2950                         } else  { /* bogus code, skip SSO and one byte */
2951                             NEXT;
2952                         }
2953                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2954                                (c1 == 0xFD || c1 == 0xFE)) {
2955                         /* CP10001 */
2956                         c2 = JIS_X_0201;
2957                         c1 &= 0x7f;
2958                         SEND;
2959                     } else {
2960                        /* already established */
2961                        c2 = c1;
2962                        NEXT;
2963                     }
2964                 }
2965             } else if ((c1 > SP) && (c1 != DEL)) {
2966                 /* in case of Roman characters */
2967                 if (shift_mode) {
2968                     /* output 1 shifted byte */
2969                     if (iso8859_f) {
2970                         c2 = ISO_8859_1;
2971                         SEND;
2972                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2973                       /* output 1 shifted byte */
2974                         if (iso2022jp_f && !x0201_f) {
2975                             (*oconv)(GETA1, GETA2);
2976                             NEXT;
2977                         } else {
2978                             c2 = JIS_X_0201;
2979                             SEND;
2980                         }
2981                     } else {
2982                         /* look like bogus code */
2983                         NEXT;
2984                     }
2985                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
2986                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
2987                     /* in case of Kanji shifted */
2988                     c2 = c1;
2989                     NEXT;
2990                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2991                     /* Check MIME code */
2992                     if ((c1 = (*i_getc)(f)) == EOF) {
2993                         (*oconv)(0, '=');
2994                         LAST;
2995                     } else if (c1 == '?') {
2996                         /* =? is mime conversion start sequence */
2997                         if(mime_f == STRICT_MIME) {
2998                             /* check in real detail */
2999                             if (mime_begin_strict(f) == EOF)
3000                                 LAST;
3001                             else
3002                                 NEXT;
3003                         } else if (mime_begin(f) == EOF)
3004                             LAST;
3005                         else
3006                             NEXT;
3007                     } else {
3008                         (*oconv)(0, '=');
3009                         (*i_ungetc)(c1,f);
3010                         NEXT;
3011                     }
3012                 } else {
3013                     /* normal ASCII code */
3014                     SEND;
3015                 }
3016             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
3017                 shift_mode = FALSE;
3018                 NEXT;
3019             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
3020                 shift_mode = TRUE;
3021                 NEXT;
3022             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
3023                 if ((c1 = (*i_getc)(f)) == EOF) {
3024                     /*  (*oconv)(0, ESC); don't send bogus code */
3025                     LAST;
3026                 } else if (c1 == '$') {
3027                     if ((c1 = (*i_getc)(f)) == EOF) {
3028                         /*
3029                         (*oconv)(0, ESC); don't send bogus code
3030                         (*oconv)(0, '$'); */
3031                         LAST;
3032                     } else if (c1 == '@'|| c1 == 'B') {
3033                         /* This is kanji introduction */
3034                         input_mode = JIS_X_0208;
3035                         shift_mode = FALSE;
3036                         set_input_codename("ISO-2022-JP");
3037 #ifdef CHECK_OPTION
3038                         debug("ISO-2022-JP");
3039 #endif
3040                         NEXT;
3041                     } else if (c1 == '(') {
3042                         if ((c1 = (*i_getc)(f)) == EOF) {
3043                             /* don't send bogus code
3044                             (*oconv)(0, ESC);
3045                             (*oconv)(0, '$');
3046                             (*oconv)(0, '(');
3047                                 */
3048                             LAST;
3049                         } else if (c1 == '@'|| c1 == 'B') {
3050                             /* This is kanji introduction */
3051                             input_mode = JIS_X_0208;
3052                             shift_mode = FALSE;
3053                             NEXT;
3054 #ifdef X0212_ENABLE
3055                         } else if (c1 == 'D'){
3056                             input_mode = JIS_X_0212;
3057                             shift_mode = FALSE;
3058                             NEXT;
3059 #endif /* X0212_ENABLE */
3060                         } else if (c1 == 0x4F){
3061                             input_mode = JIS_X_0213_1;
3062                             shift_mode = FALSE;
3063                             NEXT;
3064                         } else if (c1 == 0x50){
3065                             input_mode = JIS_X_0213_2;
3066                             shift_mode = FALSE;
3067                             NEXT;
3068                         } else {
3069                             /* could be some special code */
3070                             (*oconv)(0, ESC);
3071                             (*oconv)(0, '$');
3072                             (*oconv)(0, '(');
3073                             (*oconv)(0, c1);
3074                             NEXT;
3075                         }
3076                     } else if (broken_f&0x2) {
3077                         /* accept any ESC-(-x as broken code ... */
3078                         input_mode = JIS_X_0208;
3079                         shift_mode = FALSE;
3080                         NEXT;
3081                     } else {
3082                         (*oconv)(0, ESC);
3083                         (*oconv)(0, '$');
3084                         (*oconv)(0, c1);
3085                         NEXT;
3086                     }
3087                 } else if (c1 == '(') {
3088                     if ((c1 = (*i_getc)(f)) == EOF) {
3089                         /* don't send bogus code
3090                         (*oconv)(0, ESC);
3091                         (*oconv)(0, '('); */
3092                         LAST;
3093                     } else {
3094                         if (c1 == 'I') {
3095                             /* This is X0201 kana introduction */
3096                             input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
3097                             NEXT;
3098                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
3099                             /* This is X0208 kanji introduction */
3100                             input_mode = ASCII; shift_mode = FALSE;
3101                             NEXT;
3102                         } else if (broken_f&0x2) {
3103                             input_mode = ASCII; shift_mode = FALSE;
3104                             NEXT;
3105                         } else {
3106                             (*oconv)(0, ESC);
3107                             (*oconv)(0, '(');
3108                             /* maintain various input_mode here */
3109                             SEND;
3110                         }
3111                     }
3112                } else if ( c1 == 'N' || c1 == 'n'){
3113                    /* SS2 */
3114                    c3 = (*i_getc)(f);  /* skip SS2 */
3115                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
3116                        c1 = c3;
3117                        c2 = JIS_X_0201;
3118                        SEND;
3119                    }else{
3120                        (*i_ungetc)(c3, f);
3121                        /* lonely ESC  */
3122                        (*oconv)(0, ESC);
3123                        SEND;
3124                    }
3125                 } else {
3126                     /* lonely ESC  */
3127                     (*oconv)(0, ESC);
3128                     SEND;
3129                 }
3130             } else if (c1 == ESC && iconv == s_iconv) {
3131                 /* ESC in Shift_JIS */
3132                 if ((c1 = (*i_getc)(f)) == EOF) {
3133                     /*  (*oconv)(0, ESC); don't send bogus code */
3134                     LAST;
3135                 } else if (c1 == '$') {
3136                     /* J-PHONE emoji */
3137                     if ((c1 = (*i_getc)(f)) == EOF) {
3138                         /*
3139                            (*oconv)(0, ESC); don't send bogus code
3140                            (*oconv)(0, '$'); */
3141                         LAST;
3142                     } else {
3143                         if (('E' <= c1 && c1 <= 'G') ||
3144                             ('O' <= c1 && c1 <= 'Q')) {
3145                             /*
3146                                NUM : 0 1 2 3 4 5
3147                                BYTE: G E F O P Q
3148                                C%7 : 1 6 0 2 3 4
3149                                C%7 : 0 1 2 3 4 5 6
3150                                NUM : 2 0 3 4 5 X 1
3151                              */
3152                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
3153                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
3154                             while ((c1 = (*i_getc)(f)) != EOF) {
3155                                 if (SP <= c1 && c1 <= 'z') {
3156                                     (*oconv)(0, c1 + c0);
3157                                 } else break; /* c1 == SO */
3158                             }
3159                         }
3160                     }
3161                     if (c1 == EOF) LAST;
3162                     NEXT;
3163                 } else {
3164                     /* lonely ESC  */
3165                     (*oconv)(0, ESC);
3166                     SEND;
3167                 }
3168             } else if (c1 == LF || c1 == CR) {
3169                 if (broken_f&4) {
3170                     input_mode = ASCII; set_iconv(FALSE, 0);
3171                     SEND;
3172                 } else if (mime_decode_f && !mime_decode_mode){
3173                     if (c1 == LF) {
3174                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
3175                             i_ungetc(SP,f);
3176                             continue;
3177                         } else {
3178                             i_ungetc(c1,f);
3179                         }
3180                         c1 = LF;
3181                         SEND;
3182                     } else  { /* if (c1 == CR)*/
3183                         if ((c1=(*i_getc)(f))!=EOF) {
3184                             if (c1==SP) {
3185                                 i_ungetc(SP,f);
3186                                 continue;
3187                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
3188                                 i_ungetc(SP,f);
3189                                 continue;
3190                             } else {
3191                                 i_ungetc(c1,f);
3192                             }
3193                             i_ungetc(LF,f);
3194                         } else {
3195                             i_ungetc(c1,f);
3196                         }
3197                         c1 = CR;
3198                         SEND;
3199                     }
3200                 }
3201             } else if (c1 == DEL && input_mode == JIS_X_0208) {
3202                 /* CP5022x */
3203                 c2 = c1;
3204                 NEXT;
3205             } else
3206                 SEND;
3207         }
3208         /* send: */
3209         switch(input_mode){
3210         case ASCII:
3211             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
3212             case -2:
3213                 /* 4 bytes UTF-8 */
3214                 if ((c0 = (*i_getc)(f)) != EOF) {
3215                     code_status(c0);
3216                     c0 <<= 8;
3217                     if ((c3 = (*i_getc)(f)) != EOF) {
3218                         code_status(c3);
3219                         (*iconv)(c2, c1, c0|c3);
3220                     }
3221                 }
3222                 break;
3223             case -1:
3224                 /* 3 bytes EUC or UTF-8 */
3225                 if ((c0 = (*i_getc)(f)) != EOF) {
3226                     code_status(c0);
3227                     (*iconv)(c2, c1, c0);
3228                 }
3229                 break;
3230             }
3231             break;
3232         case JIS_X_0208:
3233         case JIS_X_0213_1:
3234             if (ms_ucs_map_f &&
3235                 0x7F <= c2 && c2 <= 0x92 &&
3236                 0x21 <= c1 && c1 <= 0x7E) {
3237                 /* CP932 UDC */
3238                 if(c1 == 0x7F) return 0;
3239                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
3240                 c2 = 0;
3241             }
3242             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
3243             break;
3244 #ifdef X0212_ENABLE
3245         case JIS_X_0212:
3246             (*oconv)(PREFIX_EUCG3 | c2, c1);
3247             break;
3248 #endif /* X0212_ENABLE */
3249         case JIS_X_0213_2:
3250             (*oconv)(PREFIX_EUCG3 | c2, c1);
3251             break;
3252         default:
3253             (*oconv)(input_mode, c1);  /* other special case */
3254         }
3255
3256         c2 = 0;
3257         c0 = 0;
3258         continue;
3259         /* goto next_word */
3260     }
3261
3262     /* epilogue */
3263     (*iconv)(EOF, 0, 0);
3264     if (!input_codename)
3265     {
3266         if (is_8bit) {
3267             struct input_code *p = input_code_list;
3268             struct input_code *result = p;
3269             while (p->name){
3270                 if (p->score < result->score) result = p;
3271                 ++p;
3272             }
3273             set_input_codename(result->name);
3274 #ifdef CHECK_OPTION
3275             debug(result->name);
3276 #endif
3277         }
3278     }
3279     return 1;
3280 }
3281
3282 nkf_char
3283 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3284 {
3285     nkf_char ret, c3, c0;
3286     int hold_index;
3287
3288
3289     /** it must NOT be in the kanji shifte sequence      */
3290     /** it must NOT be written in JIS7                   */
3291     /** and it must be after 2 byte 8bit code            */
3292
3293     hold_count = 0;
3294     push_hold_buf(c2);
3295     push_hold_buf(c1);
3296
3297     while ((c1 = (*i_getc)(f)) != EOF) {
3298         if (c1 == ESC){
3299             (*i_ungetc)(c1,f);
3300             break;
3301         }
3302         code_status(c1);
3303         if (push_hold_buf(c1) == EOF || estab_f){
3304             break;
3305         }
3306     }
3307
3308     if (!estab_f){
3309         struct input_code *p = input_code_list;
3310         struct input_code *result = p;
3311         if (c1 == EOF){
3312             code_status(c1);
3313         }
3314         while (p->name){
3315             if (p->status_func && p->score < result->score){
3316                 result = p;
3317             }
3318             ++p;
3319         }
3320         set_iconv(TRUE, result->iconv_func);
3321     }
3322
3323
3324     /** now,
3325      ** 1) EOF is detected, or
3326      ** 2) Code is established, or
3327      ** 3) Buffer is FULL (but last word is pushed)
3328      **
3329      ** in 1) and 3) cases, we continue to use
3330      ** Kanji codes by oconv and leave estab_f unchanged.
3331      **/
3332
3333     ret = c1;
3334     hold_index = 0;
3335     while (hold_index < hold_count){
3336         c2 = hold_buf[hold_index++];
3337         if (c2 <= DEL
3338 #ifdef NUMCHAR_OPTION
3339             || is_unicode_capsule(c2)
3340 #endif
3341             ){
3342             (*iconv)(0, c2, 0);
3343             continue;
3344         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3345             (*iconv)(JIS_X_0201, c2, 0);
3346             continue;
3347         }
3348         if (hold_index < hold_count){
3349             c1 = hold_buf[hold_index++];
3350         }else{
3351             c1 = (*i_getc)(f);
3352             if (c1 == EOF){
3353                 c3 = EOF;
3354                 break;
3355             }
3356             code_status(c1);
3357         }
3358         c0 = 0;
3359         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3360         case -2:
3361             /* 4 bytes UTF-8 */
3362             if (hold_index < hold_count){
3363                 c0 = hold_buf[hold_index++];
3364             } else if ((c0 = (*i_getc)(f)) == EOF) {
3365                 ret = EOF;
3366                 break;
3367             } else {
3368                 code_status(c0);
3369                 c0 <<= 8;
3370                 if (hold_index < hold_count){
3371                     c3 = hold_buf[hold_index++];
3372                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3373                     c0 = ret = EOF;
3374                     break;
3375                 } else {
3376                     code_status(c3);
3377                     (*iconv)(c2, c1, c0|c3);
3378                 }
3379             }
3380             break;
3381         case -1:
3382             /* 3 bytes EUC or UTF-8 */
3383             if (hold_index < hold_count){
3384                 c0 = hold_buf[hold_index++];
3385             } else if ((c0 = (*i_getc)(f)) == EOF) {
3386                 ret = EOF;
3387                 break;
3388             } else {
3389                 code_status(c0);
3390             }
3391             (*iconv)(c2, c1, c0);
3392             break;
3393         }
3394         if (c0 == EOF) break;
3395     }
3396     return ret;
3397 }
3398
3399 nkf_char push_hold_buf(nkf_char c2)
3400 {
3401     if (hold_count >= HOLD_SIZE*2)
3402         return (EOF);
3403     hold_buf[hold_count++] = (unsigned char)c2;
3404     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3405 }
3406
3407 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3408 {
3409 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3410     nkf_char val;
3411 #endif
3412     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3413 #ifdef SHIFTJIS_CP932
3414     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3415         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3416         if (val){
3417             c2 = val >> 8;
3418             c1 = val & 0xff;
3419         }
3420     }
3421     if (cp932inv_f
3422         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3423         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3424         if (c){
3425             c2 = c >> 8;
3426             c1 = c & 0xff;
3427         }
3428     }
3429 #endif /* SHIFTJIS_CP932 */
3430 #ifdef X0212_ENABLE
3431     if (!x0213_f && is_ibmext_in_sjis(c2)){
3432         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3433         if (val){
3434             if (val > 0x7FFF){
3435                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3436                 c1 = val & 0xff;
3437             }else{
3438                 c2 = val >> 8;
3439                 c1 = val & 0xff;
3440             }
3441             if (p2) *p2 = c2;
3442             if (p1) *p1 = c1;
3443             return 0;
3444         }
3445     }
3446 #endif
3447     if(c2 >= 0x80){
3448         if(x0213_f && c2 >= 0xF0){
3449             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3450                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3451             }else{ /* 78<=k<=94 */
3452                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3453                 if (0x9E < c1) c2++;
3454             }
3455         }else{
3456             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3457             if (0x9E < c1) c2++;
3458         }
3459         if (c1 < 0x9F)
3460             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3461         else {
3462             c1 = c1 - 0x7E;
3463         }
3464     }
3465
3466 #ifdef X0212_ENABLE
3467     c2 = x0212_unshift(c2);
3468 #endif
3469     if (p2) *p2 = c2;
3470     if (p1) *p1 = c1;
3471     return 0;
3472 }
3473
3474 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3475 {
3476     if (c2 == JIS_X_0201) {
3477         c1 &= 0x7f;
3478     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3479         /* NOP */
3480     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3481         /* CP932 UDC */
3482         if(c1 == 0x7F) return 0;
3483         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3484         c2 = 0;
3485     } else {
3486         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3487         if (ret) return ret;
3488     }
3489     (*oconv)(c2, c1);
3490     return 0;
3491 }
3492
3493 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3494 {
3495     if (c2 == JIS_X_0201) {
3496         c1 &= 0x7f;
3497 #ifdef X0212_ENABLE
3498     }else if (c2 == 0x8f){
3499         if (c0 == 0){
3500             return -1;
3501         }
3502         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3503             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3504             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3505             c2 = 0;
3506         } else {
3507             c2 = (c2 << 8) | (c1 & 0x7f);
3508             c1 = c0 & 0x7f;
3509 #ifdef SHIFTJIS_CP932
3510             if (cp51932_f){
3511                 nkf_char s2, s1;
3512                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3513                     s2e_conv(s2, s1, &c2, &c1);
3514                     if (c2 < 0x100){
3515                         c1 &= 0x7f;
3516                         c2 &= 0x7f;
3517                     }
3518                 }
3519             }
3520 #endif /* SHIFTJIS_CP932 */
3521         }
3522 #endif /* X0212_ENABLE */
3523     } else if (c2 == SSO){
3524         c2 = JIS_X_0201;
3525         c1 &= 0x7f;
3526     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3527         /* NOP */
3528     } else {
3529         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3530             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3531             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3532             c2 = 0;
3533         } else {
3534             c1 &= 0x7f;
3535             c2 &= 0x7f;
3536 #ifdef SHIFTJIS_CP932
3537             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3538                 nkf_char s2, s1;
3539                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3540                     s2e_conv(s2, s1, &c2, &c1);
3541                     if (c2 < 0x100){
3542                         c1 &= 0x7f;
3543                         c2 &= 0x7f;
3544                     }
3545                 }
3546             }
3547 #endif /* SHIFTJIS_CP932 */
3548         }
3549     }
3550     (*oconv)(c2, c1);
3551     return 0;
3552 }
3553
3554 #ifdef UTF8_INPUT_ENABLE
3555 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3556 {
3557     nkf_char ret = 0;
3558
3559     if (!c1){
3560         *p2 = 0;
3561         *p1 = c2;
3562     }else if (0xc0 <= c2 && c2 <= 0xef) {
3563         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3564 #ifdef NUMCHAR_OPTION
3565         if (ret > 0){
3566             if (p2) *p2 = 0;
3567             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3568             ret = 0;
3569         }
3570 #endif
3571     }
3572     return ret;
3573 }
3574
3575 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3576 {
3577     nkf_char ret = 0;
3578     static const char w_iconv_utf8_1st_byte[] =
3579     { /* 0xC0 - 0xFF */
3580         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3581         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3582         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3583         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3584
3585     if (c2 < 0 || 0xff < c2) {
3586     }else if (c2 == 0) { /* 0 : 1 byte*/
3587         c0 = 0;
3588     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3589         return 0;
3590     } else{
3591         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3592         case 21:
3593             if (c1 < 0x80 || 0xBF < c1) return 0;
3594             break;
3595         case 30:
3596             if (c0 == 0) return -1;
3597             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3598                 return 0;
3599             break;
3600         case 31:
3601         case 33:
3602             if (c0 == 0) return -1;
3603             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3604                 return 0;
3605             break;
3606         case 32:
3607             if (c0 == 0) return -1;
3608             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3609                 return 0;
3610             break;
3611         case 40:
3612             if (c0 == 0) return -2;
3613             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3614                 return 0;
3615             break;
3616         case 41:
3617             if (c0 == 0) return -2;
3618             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3619                 return 0;
3620             break;
3621         case 42:
3622             if (c0 == 0) return -2;
3623             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3624                 return 0;
3625             break;
3626         default:
3627             return 0;
3628             break;
3629         }
3630     }
3631     if (c2 == 0 || c2 == EOF){
3632     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3633         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3634         c2 = 0;
3635     } else {
3636         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3637     }
3638     if (ret == 0){
3639         (*oconv)(c2, c1);
3640     }
3641     return ret;
3642 }
3643 #endif
3644
3645 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3646 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3647 {
3648     val &= VALUE_MASK;
3649     if (val < 0x80){
3650         *p2 = val;
3651         *p1 = 0;
3652         *p0 = 0;
3653     }else if (val < 0x800){
3654         *p2 = 0xc0 | (val >> 6);
3655         *p1 = 0x80 | (val & 0x3f);
3656         *p0 = 0;
3657     } else if (val <= NKF_INT32_C(0xFFFF)) {
3658         *p2 = 0xe0 | (val >> 12);
3659         *p1 = 0x80 | ((val >> 6) & 0x3f);
3660         *p0 = 0x80 | (val        & 0x3f);
3661     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3662         *p2 = 0xe0 |  (val >> 16);
3663         *p1 = 0x80 | ((val >> 12) & 0x3f);
3664         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3665     } else {
3666         *p2 = 0;
3667         *p1 = 0;
3668         *p0 = 0;
3669     }
3670 }
3671 #endif
3672
3673 #ifdef UTF8_INPUT_ENABLE
3674 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3675 {
3676     nkf_char val;
3677     if (c2 >= 0xf8) {
3678         val = -1;
3679     } else if (c2 >= 0xf0){
3680         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3681         val = (c2 & 0x0f) << 18;
3682         val |= (c1 & 0x3f) << 12;
3683         val |= (c0 & 0x3f00) >> 2;
3684         val |= (c0 & 0x3f);
3685     }else if (c2 >= 0xe0){
3686         val = (c2 & 0x0f) << 12;
3687         val |= (c1 & 0x3f) << 6;
3688         val |= (c0 & 0x3f);
3689     }else if (c2 >= 0xc0){
3690         val = (c2 & 0x1f) << 6;
3691         val |= (c1 & 0x3f);
3692     }else{
3693         val = c2;
3694     }
3695     return val;
3696 }
3697
3698 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3699 {
3700     nkf_char c2, c1, c0;
3701     nkf_char ret = 0;
3702     val &= VALUE_MASK;
3703     if (val < 0x80){
3704         *p2 = 0;
3705         *p1 = val;
3706     }else{
3707         w16w_conv(val, &c2, &c1, &c0);
3708         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3709 #ifdef NUMCHAR_OPTION
3710         if (ret > 0){
3711             *p2 = 0;
3712             *p1 = CLASS_UNICODE | val;
3713             ret = 0;
3714         }
3715 #endif
3716     }
3717     return ret;
3718 }
3719 #endif
3720
3721 #ifdef UTF8_INPUT_ENABLE
3722 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3723 {
3724     nkf_char ret = 0;
3725     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3726         (*oconv)(c2, c1);
3727         return 0;
3728     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3729         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3730             return -2;
3731         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3732         c2 = 0;
3733     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3734         /*
3735            return 2;
3736         */
3737         return 1;
3738     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3739     if (ret) return ret;
3740     (*oconv)(c2, c1);
3741     return 0;
3742 }
3743
3744 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3745 {
3746     int ret = 0;
3747
3748     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3749     } else if (is_unicode_bmp(c1)) {
3750         ret = w16e_conv(c1, &c2, &c1);
3751     } else {
3752         c2 = 0;
3753         c1 =  CLASS_UNICODE | c1;
3754     }
3755     if (ret) return ret;
3756     (*oconv)(c2, c1);
3757     return 0;
3758 }
3759
3760 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3761 {
3762     const unsigned short *const *pp;
3763     const unsigned short *const *const *ppp;
3764     static const char no_best_fit_chars_table_C2[] =
3765     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3766         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3767         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3768         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3769     static const char no_best_fit_chars_table_C2_ms[] =
3770     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3771         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3772         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3773         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3774     static const char no_best_fit_chars_table_932_C2[] =
3775     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3776         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3777         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3778         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3779     static const char no_best_fit_chars_table_932_C3[] =
3780     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3781         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3782         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3783         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3784     nkf_char ret = 0;
3785
3786     if(c2 < 0x80){
3787         *p2 = 0;
3788         *p1 = c2;
3789     }else if(c2 < 0xe0){
3790         if(no_best_fit_chars_f){
3791             if(ms_ucs_map_f == UCS_MAP_CP932){
3792                 switch(c2){
3793                 case 0xC2:
3794                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3795                     break;
3796                 case 0xC3:
3797                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3798                     break;
3799                 }
3800             }else if(!cp932inv_f){
3801                 switch(c2){
3802                 case 0xC2:
3803                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3804                     break;
3805                 case 0xC3:
3806                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3807                     break;
3808                 }
3809             }else if(ms_ucs_map_f == UCS_MAP_MS){
3810                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3811             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3812                 switch(c2){
3813                 case 0xC2:
3814                     switch(c1){
3815                     case 0xA2:
3816                     case 0xA3:
3817                     case 0xA5:
3818                     case 0xA6:
3819                     case 0xAC:
3820                     case 0xAF:
3821                     case 0xB8:
3822                         return 1;
3823                     }
3824                     break;
3825                 }
3826             }
3827         }
3828         pp =
3829             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3830             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3831             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3832             utf8_to_euc_2bytes;
3833         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3834     }else if(c0 < 0xF0){
3835         if(no_best_fit_chars_f){
3836             if(ms_ucs_map_f == UCS_MAP_CP932){
3837                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3838             }else if(ms_ucs_map_f == UCS_MAP_MS){
3839                 switch(c2){
3840                 case 0xE2:
3841                     switch(c1){
3842                     case 0x80:
3843                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3844                         break;
3845                     case 0x88:
3846                         if(c0 == 0x92) return 1;
3847                         break;
3848                     }
3849                     break;
3850                 case 0xE3:
3851                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3852                     break;
3853                 }
3854             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3855                 switch(c2){
3856                 case 0xE3:
3857                     switch(c1){
3858                     case 0x82:
3859                             if(c0 == 0x94) return 1;
3860                         break;
3861                     case 0x83:
3862                             if(c0 == 0xBB) return 1;
3863                         break;
3864                     }
3865                     break;
3866                 }
3867             }else{
3868                 switch(c2){
3869                 case 0xE2:
3870                     switch(c1){
3871                     case 0x80:
3872                         if(c0 == 0x95) return 1;
3873                         break;
3874                     case 0x88:
3875                         if(c0 == 0xA5) return 1;
3876                         break;
3877                     }
3878                     break;
3879                 case 0xEF:
3880                     switch(c1){
3881                     case 0xBC:
3882                         if(c0 == 0x8D) return 1;
3883                         break;
3884                     case 0xBD:
3885                         if(c0 == 0x9E && !cp932inv_f) return 1;
3886                         break;
3887                     case 0xBF:
3888                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3889                         break;
3890                     }
3891                     break;
3892                 }
3893             }
3894         }
3895         ppp =
3896             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3897             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3898             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3899             utf8_to_euc_3bytes;
3900         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3901     }else return -1;
3902 #ifdef SHIFTJIS_CP932
3903     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3904         nkf_char s2, s1;
3905         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3906             s2e_conv(s2, s1, p2, p1);
3907         }else{
3908             ret = 1;
3909         }
3910     }
3911 #endif
3912     return ret;
3913 }
3914
3915 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3916 {
3917     nkf_char c2;
3918     const unsigned short *p;
3919     unsigned short val;
3920
3921     if (pp == 0) return 1;
3922
3923     c1 -= 0x80;
3924     if (c1 < 0 || psize <= c1) return 1;
3925     p = pp[c1];
3926     if (p == 0)  return 1;
3927
3928     c0 -= 0x80;
3929     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3930     val = p[c0];
3931     if (val == 0) return 1;
3932     if (no_cp932ext_f && (
3933         (val>>8) == 0x2D || /* NEC special characters */
3934         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3935         )) return 1;
3936
3937     c2 = val >> 8;
3938    if (val > 0x7FFF){
3939         c2 &= 0x7f;
3940         c2 |= PREFIX_EUCG3;
3941     }
3942     if (c2 == SO) c2 = JIS_X_0201;
3943     c1 = val & 0x7f;
3944     if (p2) *p2 = c2;
3945     if (p1) *p1 = c1;
3946     return 0;
3947 }
3948
3949 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3950 {
3951     int shift = 20;
3952     c &= VALUE_MASK;
3953     while(shift >= 0){
3954         if(c >= 1<<shift){
3955             while(shift >= 0){
3956                 (*f)(0, bin2hex(c>>shift));
3957                 shift -= 4;
3958             }
3959         }else{
3960             shift -= 4;
3961         }
3962     }
3963     return;
3964 }
3965
3966 void encode_fallback_html(nkf_char c)
3967 {
3968     (*oconv)(0, '&');
3969     (*oconv)(0, '#');
3970     c &= VALUE_MASK;
3971     if(c >= NKF_INT32_C(1000000))
3972         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3973     if(c >= NKF_INT32_C(100000))
3974         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3975     if(c >= 10000)
3976         (*oconv)(0, 0x30+(c/10000  )%10);
3977     if(c >= 1000)
3978         (*oconv)(0, 0x30+(c/1000   )%10);
3979     if(c >= 100)
3980         (*oconv)(0, 0x30+(c/100    )%10);
3981     if(c >= 10)
3982         (*oconv)(0, 0x30+(c/10     )%10);
3983     if(c >= 0)
3984         (*oconv)(0, 0x30+ c         %10);
3985     (*oconv)(0, ';');
3986     return;
3987 }
3988
3989 void encode_fallback_xml(nkf_char c)
3990 {
3991     (*oconv)(0, '&');
3992     (*oconv)(0, '#');
3993     (*oconv)(0, 'x');
3994     nkf_each_char_to_hex(oconv, c);
3995     (*oconv)(0, ';');
3996     return;
3997 }
3998
3999 void encode_fallback_java(nkf_char c)
4000 {
4001     (*oconv)(0, '\\');
4002     c &= VALUE_MASK;
4003     if(!is_unicode_bmp(c)){
4004         (*oconv)(0, 'U');
4005         (*oconv)(0, '0');
4006         (*oconv)(0, '0');
4007         (*oconv)(0, bin2hex(c>>20));
4008         (*oconv)(0, bin2hex(c>>16));
4009     }else{
4010         (*oconv)(0, 'u');
4011     }
4012     (*oconv)(0, bin2hex(c>>12));
4013     (*oconv)(0, bin2hex(c>> 8));
4014     (*oconv)(0, bin2hex(c>> 4));
4015     (*oconv)(0, bin2hex(c    ));
4016     return;
4017 }
4018
4019 void encode_fallback_perl(nkf_char c)
4020 {
4021     (*oconv)(0, '\\');
4022     (*oconv)(0, 'x');
4023     (*oconv)(0, '{');
4024     nkf_each_char_to_hex(oconv, c);
4025     (*oconv)(0, '}');
4026     return;
4027 }
4028
4029 void encode_fallback_subchar(nkf_char c)
4030 {
4031     c = unicode_subchar;
4032     (*oconv)((c>>8)&0xFF, c&0xFF);
4033     return;
4034 }
4035 #endif
4036
4037 #ifdef UTF8_OUTPUT_ENABLE
4038 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
4039 {
4040     const unsigned short *p;
4041
4042     if (c2 == JIS_X_0201) {
4043         if (ms_ucs_map_f == UCS_MAP_CP10001) {
4044             switch (c1) {
4045             case 0x20:
4046                 return 0xA0;
4047             case 0x7D:
4048                 return 0xA9;
4049             }
4050         }
4051         p = euc_to_utf8_1byte;
4052 #ifdef X0212_ENABLE
4053     } else if (is_eucg3(c2)){
4054         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
4055             return 0xA6;
4056         }
4057         c2 = (c2&0x7f) - 0x21;
4058         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4059             p = x0212_to_utf8_2bytes[c2];
4060         else
4061             return 0;
4062 #endif
4063     } else {
4064         c2 &= 0x7f;
4065         c2 = (c2&0x7f) - 0x21;
4066         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4067             p =
4068                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
4069                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
4070                 euc_to_utf8_2bytes_ms[c2];
4071         else
4072             return 0;
4073     }
4074     if (!p) return 0;
4075     c1 = (c1 & 0x7f) - 0x21;
4076     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
4077         return p[c1];
4078     return 0;
4079 }
4080
4081 void w_oconv(nkf_char c2, nkf_char c1)
4082 {
4083     nkf_char c0;
4084     nkf_char val;
4085
4086     if (output_bom_f) {
4087         output_bom_f = FALSE;
4088         (*o_putc)('\357');
4089         (*o_putc)('\273');
4090         (*o_putc)('\277');
4091     }
4092
4093     if (c2 == EOF) {
4094         (*o_putc)(EOF);
4095         return;
4096     }
4097
4098 #ifdef NUMCHAR_OPTION
4099     if (c2 == 0 && is_unicode_capsule(c1)){
4100         val = c1 & VALUE_MASK;
4101         if (val < 0x80){
4102             (*o_putc)(val);
4103         }else if (val < 0x800){
4104             (*o_putc)(0xC0 | (val >> 6));
4105             (*o_putc)(0x80 | (val & 0x3f));
4106         } else if (val <= NKF_INT32_C(0xFFFF)) {
4107             (*o_putc)(0xE0 | (val >> 12));
4108             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
4109             (*o_putc)(0x80 | (val        & 0x3f));
4110         } else if (val <= NKF_INT32_C(0x10FFFF)) {
4111             (*o_putc)(0xF0 | ( val>>18));
4112             (*o_putc)(0x80 | ((val>>12) & 0x3f));
4113             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
4114             (*o_putc)(0x80 | ( val      & 0x3f));
4115         }
4116         return;
4117     }
4118 #endif
4119
4120     if (c2 == 0) {
4121         output_mode = ASCII;
4122         (*o_putc)(c1);
4123     } else if (c2 == ISO_8859_1) {
4124         output_mode = UTF_8;
4125         (*o_putc)(c1 | 0x080);
4126     } else {
4127         output_mode = UTF_8;
4128         val = e2w_conv(c2, c1);
4129         if (val){
4130             w16w_conv(val, &c2, &c1, &c0);
4131             (*o_putc)(c2);
4132             if (c1){
4133                 (*o_putc)(c1);
4134                 if (c0) (*o_putc)(c0);
4135             }
4136         }
4137     }
4138 }
4139
4140 void w_oconv16(nkf_char c2, nkf_char c1)
4141 {
4142     if (output_bom_f) {
4143         output_bom_f = FALSE;
4144         if (output_endian == ENDIAN_LITTLE){
4145             (*o_putc)((unsigned char)'\377');
4146             (*o_putc)('\376');
4147         }else{
4148             (*o_putc)('\376');
4149             (*o_putc)((unsigned char)'\377');
4150         }
4151     }
4152
4153     if (c2 == EOF) {
4154         (*o_putc)(EOF);
4155         return;
4156     }
4157
4158     if (c2 == ISO_8859_1) {
4159         c2 = 0;
4160         c1 |= 0x80;
4161 #ifdef NUMCHAR_OPTION
4162     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4163         if (is_unicode_bmp(c1)) {
4164             c2 = (c1 >> 8) & 0xff;
4165             c1 &= 0xff;
4166         } else {
4167             c1 &= VALUE_MASK;
4168             if (c1 <= UNICODE_MAX) {
4169                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
4170                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
4171                 if (output_endian == ENDIAN_LITTLE){
4172                     (*o_putc)(c2 & 0xff);
4173                     (*o_putc)((c2 >> 8) & 0xff);
4174                     (*o_putc)(c1 & 0xff);
4175                     (*o_putc)((c1 >> 8) & 0xff);
4176                 }else{
4177                     (*o_putc)((c2 >> 8) & 0xff);
4178                     (*o_putc)(c2 & 0xff);
4179                     (*o_putc)((c1 >> 8) & 0xff);
4180                     (*o_putc)(c1 & 0xff);
4181                 }
4182             }
4183             return;
4184         }
4185 #endif
4186     } else if (c2) {
4187         nkf_char val = e2w_conv(c2, c1);
4188         c2 = (val >> 8) & 0xff;
4189         c1 = val & 0xff;
4190         if (!val) return;
4191     }
4192     if (output_endian == ENDIAN_LITTLE){
4193         (*o_putc)(c1);
4194         (*o_putc)(c2);
4195     }else{
4196         (*o_putc)(c2);
4197         (*o_putc)(c1);
4198     }
4199 }
4200
4201 void w_oconv32(nkf_char c2, nkf_char c1)
4202 {
4203     if (output_bom_f) {
4204     &n