OSDN Git Service

* refactoring arround set_{in,out}_encoding.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.165 2008/01/22 00:30:05 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2008-01-21"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #ifndef MIME_DECODE_DEFAULT
44 #define MIME_DECODE_DEFAULT STRICT_MIME
45 #endif
46 #ifndef X0201_DEFAULT
47 #define X0201_DEFAULT TRUE
48 #endif
49
50 #if DEFAULT_NEWLINE == 0x0D0A
51 #define PUT_NEWLINE(func) do {\
52     func(0x0D);\
53     func(0x0A);\
54 } while (0)
55 #define OCONV_NEWLINE(func) do {\
56     func(0, 0x0D);\
57     func(0, 0x0A);\
58 } while (0)
59 #elif DEFAULT_NEWLINE == 0x0D
60 #define PUT_NEWLINE(func) func(0x0D)
61 #define OCONV_NEWLINE(func) func(0, 0x0D)
62 #else
63 #define DEFAULT_NEWLINE 0x0A
64 #define PUT_NEWLINE(func) func(0x0A)
65 #define OCONV_NEWLINE(func) func(0, 0x0A)
66 #endif
67 #ifdef HELP_OUTPUT_STDERR
68 #define HELP_OUTPUT stderr
69 #else
70 #define HELP_OUTPUT stdout
71 #endif
72
73 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
74 #define MSDOS
75 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
76 #define __WIN32__
77 #endif
78 #endif
79
80 #ifdef PERL_XS
81 #undef OVERWRITE
82 #endif
83
84 #ifndef PERL_XS
85 #include <stdio.h>
86 #endif
87
88 #include <stdlib.h>
89 #include <string.h>
90
91 #if defined(MSDOS) || defined(__OS2__)
92 #include <fcntl.h>
93 #include <io.h>
94 #if defined(_MSC_VER) || defined(__WATCOMC__)
95 #define mktemp _mktemp
96 #endif
97 #endif
98
99 #ifdef MSDOS
100 #ifdef LSI_C
101 #define setbinmode(fp) fsetbin(fp)
102 #elif defined(__DJGPP__)
103 #include <libc/dosio.h>
104 #define setbinmode(fp) djgpp_setbinmode(fp)
105 #else /* Microsoft C, Turbo C */
106 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
107 #endif
108 #else /* UNIX */
109 #define setbinmode(fp)
110 #endif
111
112 #if defined(__DJGPP__)
113 void  djgpp_setbinmode(FILE *fp)
114 {
115     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
116     int fd, m;
117     fd = fileno(fp);
118     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
119     __file_handle_set(fd, m);
120 }
121 #endif
122
123 #ifdef _IOFBF /* SysV and MSDOS, Windows */
124 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
125 #else /* BSD */
126 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
127 #endif
128
129 /*Borland C++ 4.5 EasyWin*/
130 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
131 #define         EASYWIN
132 #ifndef __WIN16__
133 #define __WIN16__
134 #endif
135 #include <windows.h>
136 #endif
137
138 #ifdef OVERWRITE
139 /* added by satoru@isoternet.org */
140 #if defined(__EMX__)
141 #include <sys/types.h>
142 #endif
143 #include <sys/stat.h>
144 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
145 #include <unistd.h>
146 #if defined(__WATCOMC__)
147 #include <sys/utime.h>
148 #else
149 #include <utime.h>
150 #endif
151 #else /* defined(MSDOS) */
152 #ifdef __WIN32__
153 #ifdef __BORLANDC__ /* BCC32 */
154 #include <utime.h>
155 #else /* !defined(__BORLANDC__) */
156 #include <sys/utime.h>
157 #endif /* (__BORLANDC__) */
158 #else /* !defined(__WIN32__) */
159 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
160 #include <sys/utime.h>
161 #elif defined(__TURBOC__) /* BCC */
162 #include <utime.h>
163 #elif defined(LSI_C) /* LSI C */
164 #endif /* (__WIN32__) */
165 #endif
166 #endif
167 #endif
168
169
170 #ifndef __WIN32__
171 #define HAVE_LANGINFO_H
172 #define HAVE_LOCALE_H
173 #endif
174
175 #ifdef HAVE_LANGINFO_H
176 #include <langinfo.h>
177 #endif
178 #ifdef HAVE_LOCALE_H
179 #include <locale.h>
180 #endif
181
182 #define         FALSE   0
183 #define         TRUE    1
184
185 /* state of output_mode and input_mode
186
187    c2           0 means ASCII
188                 JIS_X_0201
189                 ISO_8859_1
190                 JIS_X_0208
191                 EOF      all termination
192    c1           32bit data
193
194  */
195
196 /* MIME ENCODE */
197
198 #define         FIXED_MIME      7
199 #define         STRICT_MIME     8
200
201 /* byte order */
202 enum byte_order {
203     ENDIAN_BIG    = 1,
204     ENDIAN_LITTLE = 2,
205     ENDIAN_2143   = 3,
206     ENDIAN_3412   = 4
207 };
208
209 /* ASCII CODE */
210
211 #define         BS      0x08
212 #define         TAB     0x09
213 #define         LF      0x0a
214 #define         CR      0x0d
215 #define         ESC     0x1b
216 #define         SP      0x20
217 #define         AT      0x40
218 #define         SSP     0xa0
219 #define         DEL     0x7f
220 #define         SI      0x0f
221 #define         SO      0x0e
222 #define         SSO     0x8e
223 #define         SS3     0x8f
224 #define         CRLF    0x0D0A
225
226
227 /* encodings */
228
229 enum nkf_encodings {
230     ASCII,
231     ISO_8859_1,
232     ISO_2022_JP,
233     CP50220,
234     CP50221,
235     CP50222,
236     ISO_2022_JP_1,
237     ISO_2022_JP_3,
238     SHIFT_JIS,
239     WINDOWS_31J,
240     CP10001,
241     EUC_JP,
242     CP51932,
243     EUCJP_MS,
244     EUCJP_ASCII,
245     SHIFT_JISX0213,
246     SHIFT_JIS_2004,
247     EUC_JISX0213,
248     EUC_JIS_2004,
249     UTF_8,
250     UTF_8N,
251     UTF_8_BOM,
252     UTF8_MAC,
253     UTF_16,
254     UTF_16BE,
255     UTF_16BE_BOM,
256     UTF_16LE,
257     UTF_16LE_BOM,
258     UTF_32,
259     UTF_32BE,
260     UTF_32BE_BOM,
261     UTF_32LE,
262     UTF_32LE_BOM,
263     NKF_ENCODING_TABLE_SIZE,
264     JIS_X_0201=0x1000,
265     JIS_X_0208=0x1001,
266     JIS_X_0212=0x1002,
267     JIS_X_0213_1=0x1003,
268     JIS_X_0213_2=0x1004,
269     BINARY
270 };
271
272 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
273 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
274 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
275 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
276 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
277 void j_oconv(nkf_char c2, nkf_char c1);
278 void s_oconv(nkf_char c2, nkf_char c1);
279 void e_oconv(nkf_char c2, nkf_char c1);
280 void w_oconv(nkf_char c2, nkf_char c1);
281 void w_oconv16(nkf_char c2, nkf_char c1);
282 void w_oconv32(nkf_char c2, nkf_char c1);
283
284 typedef struct {
285     const char *name;
286     nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
287     void (*oconv)(nkf_char c2, nkf_char c1);
288 } nkf_native_encoding;
289
290 nkf_native_encoding NkfEncodingASCII =          { "ASCII", e_iconv, e_oconv };
291 nkf_native_encoding NkfEncodingISO_2022_JP =    { "ISO-2022-JP", e_iconv, j_oconv };
292 nkf_native_encoding NkfEncodingShift_JIS =      { "Shift_JIS", s_iconv, s_oconv };
293 nkf_native_encoding NkfEncodingEUC_JP =         { "EUC-JP", e_iconv, e_oconv };
294 nkf_native_encoding NkfEncodingUTF_8 =          { "UTF-8", w_iconv, w_oconv };
295 nkf_native_encoding NkfEncodingUTF_16 =         { "UTF-16", w_iconv16, w_oconv16 };
296 nkf_native_encoding NkfEncodingUTF_32 =         { "UTF-32", w_iconv32, w_oconv32 };
297
298 typedef struct {
299     const int id;
300     const char *name;
301     const nkf_native_encoding *base_encoding;
302 } nkf_encoding;
303
304 nkf_encoding nkf_encoding_table[] = {
305     {ASCII,             "US-ASCII",             &NkfEncodingASCII},
306     {ISO_8859_1,        "ISO-8859-1",           &NkfEncodingASCII},
307     {ISO_2022_JP,       "ISO-2022-JP",          &NkfEncodingISO_2022_JP},
308     {CP50220,           "CP50220",              &NkfEncodingISO_2022_JP},
309     {CP50221,           "CP50221",              &NkfEncodingISO_2022_JP},
310     {CP50222,           "CP50222",              &NkfEncodingISO_2022_JP},
311     {ISO_2022_JP_1,     "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
312     {ISO_2022_JP_3,     "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
313     {SHIFT_JIS,         "Shift_JIS",            &NkfEncodingShift_JIS},
314     {WINDOWS_31J,       "Windows-31J",          &NkfEncodingShift_JIS},
315     {CP10001,           "CP10001",              &NkfEncodingShift_JIS},
316     {EUC_JP,            "EUC-JP",               &NkfEncodingEUC_JP},
317     {CP51932,           "CP51932",              &NkfEncodingEUC_JP},
318     {EUCJP_MS,          "eucJP-MS",             &NkfEncodingEUC_JP},
319     {EUCJP_ASCII,       "eucJP-ASCII",          &NkfEncodingEUC_JP},
320     {SHIFT_JISX0213,    "Shift_JISX0213",       &NkfEncodingShift_JIS},
321     {SHIFT_JIS_2004,    "Shift_JIS-2004",       &NkfEncodingShift_JIS},
322     {EUC_JISX0213,      "EUC-JISX0213",         &NkfEncodingEUC_JP},
323     {EUC_JIS_2004,      "EUC-JIS-2004",         &NkfEncodingEUC_JP},
324     {UTF_8,             "UTF-8",                &NkfEncodingUTF_8},
325     {UTF_8N,            "UTF-8N",               &NkfEncodingUTF_8},
326     {UTF_8_BOM,         "UTF-8-BOM",            &NkfEncodingUTF_8},
327     {UTF8_MAC,          "UTF8-MAC",             &NkfEncodingUTF_8},
328     {UTF_16,            "UTF-16",               &NkfEncodingUTF_16},
329     {UTF_16BE,          "UTF-16BE",             &NkfEncodingUTF_16},
330     {UTF_16BE_BOM,      "UTF-16BE-BOM",         &NkfEncodingUTF_16},
331     {UTF_16LE,          "UTF-16LE",             &NkfEncodingUTF_16},
332     {UTF_16LE_BOM,      "UTF-16LE-BOM",         &NkfEncodingUTF_16},
333     {UTF_32,            "UTF-32",               &NkfEncodingUTF_32},
334     {UTF_32BE,          "UTF-32BE",             &NkfEncodingUTF_32},
335     {UTF_32BE_BOM,      "UTF-32BE-BOM",         &NkfEncodingUTF_32},
336     {UTF_32LE,          "UTF-32LE",             &NkfEncodingUTF_32},
337     {UTF_32LE_BOM,      "UTF-32LE-BOM",         &NkfEncodingUTF_32},
338     {BINARY,            "BINARY",               &NkfEncodingASCII},
339     {-1,                NULL,                   NULL}
340 };
341
342 struct {
343     const char *name;
344     const int id;
345 } encoding_name_to_id_table[] = {
346     {"US-ASCII",                ASCII},
347     {"ASCII",                   ASCII},
348     {"ISO-2022-JP",             ISO_2022_JP},
349     {"ISO2022JP-CP932",         CP50220},
350     {"CP50220",                 CP50220},
351     {"CP50221",                 CP50221},
352     {"CP50222",                 CP50222},
353     {"ISO-2022-JP-1",           ISO_2022_JP_1},
354     {"ISO-2022-JP-3",           ISO_2022_JP_3},
355     {"SHIFT_JIS",               SHIFT_JIS},
356     {"SJIS",                    SHIFT_JIS},
357     {"WINDOWS-31J",             WINDOWS_31J},
358     {"CSWINDOWS31J",            WINDOWS_31J},
359     {"CP932",                   WINDOWS_31J},
360     {"MS932",                   WINDOWS_31J},
361     {"CP10001",                 CP10001},
362     {"EUCJP",                   EUC_JP},
363     {"EUC-JP",                  EUC_JP},
364     {"CP51932",                 CP51932},
365     {"EUC-JP-MS",               EUCJP_MS},
366     {"EUCJP-MS",                EUCJP_MS},
367     {"EUCJPMS",                 EUCJP_MS},
368     {"EUC-JP-ASCII",            EUCJP_ASCII},
369     {"EUCJP-ASCII",             EUCJP_ASCII},
370     {"SHIFT_JISX0213",          SHIFT_JISX0213},
371     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
372     {"EUC-JISX0213",            EUC_JISX0213},
373     {"EUC-JIS-2004",            EUC_JIS_2004},
374     {"UTF-8",                   UTF_8},
375     {"UTF-8N",                  UTF_8N},
376     {"UTF-8-BOM",               UTF_8_BOM},
377     {"UTF8-MAC",                UTF8_MAC},
378     {"UTF-8-MAC",               UTF8_MAC},
379     {"UTF-16",                  UTF_16},
380     {"UTF-16BE",                UTF_16BE},
381     {"UTF-16BE-BOM",            UTF_16BE_BOM},
382     {"UTF-16LE",                UTF_16LE},
383     {"UTF-16LE-BOM",            UTF_16LE_BOM},
384     {"UTF-32",                  UTF_32},
385     {"UTF-32BE",                UTF_32BE},
386     {"UTF-32BE-BOM",            UTF_32BE_BOM},
387     {"UTF-32LE",                UTF_32LE},
388     {"UTF-32LE-BOM",            UTF_32LE_BOM},
389     {"BINARY",                  BINARY},
390     {NULL,                      -1}
391 };
392
393 #if defined(DEFAULT_CODE_JIS)
394 #define     DEFAULT_ENCIDX ISO_2022_JP
395 #elif defined(DEFAULT_CODE_SJIS)
396 #define     DEFAULT_ENCIDX SHIFT_JIS
397 #elif defined(DEFAULT_CODE_EUC)
398 #define     DEFAULT_ENCIDX EUC_JP
399 #elif defined(DEFAULT_CODE_UTF8)
400 #define     DEFAULT_ENCIDX UTF_8
401 #endif
402
403
404 #define         is_alnum(c)  \
405             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
406
407 /* I don't trust portablity of toupper */
408 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
409 #define nkf_isoctal(c)  ('0'<=c && c<='7')
410 #define nkf_isdigit(c)  ('0'<=c && c<='9')
411 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
412 #define nkf_isblank(c) (c == SP || c == TAB)
413 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
414 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
415 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
416 #define nkf_isprint(c) (SP<=c && c<='~')
417 #define nkf_isgraph(c) ('!'<=c && c<='~')
418 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
419                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
420                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
421 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
422 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
423 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
424     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
425      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
426
427 #define CP932_TABLE_BEGIN 0xFA
428 #define CP932_TABLE_END   0xFC
429 #define CP932INV_TABLE_BEGIN 0xED
430 #define CP932INV_TABLE_END   0xEE
431 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
432
433 #define         HOLD_SIZE       1024
434 #if defined(INT_IS_SHORT)
435 #define         IOBUF_SIZE      2048
436 #else
437 #define         IOBUF_SIZE      16384
438 #endif
439
440 #define         DEFAULT_J       'B'
441 #define         DEFAULT_R       'B'
442
443 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
444 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
445
446 #define         RANGE_NUM_MAX   18
447 #define         GETA1   0x22
448 #define         GETA2   0x2e
449
450
451 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
452 #define sizeof_euc_to_utf8_1byte 94
453 #define sizeof_euc_to_utf8_2bytes 94
454 #define sizeof_utf8_to_euc_C2 64
455 #define sizeof_utf8_to_euc_E5B8 64
456 #define sizeof_utf8_to_euc_2bytes 112
457 #define sizeof_utf8_to_euc_3bytes 16
458 #endif
459
460 /* MIME preprocessor */
461
462 #ifdef EASYWIN /*Easy Win */
463 extern POINT _BufferSize;
464 #endif
465
466 struct input_code{
467     char *name;
468     nkf_char stat;
469     nkf_char score;
470     nkf_char index;
471     nkf_char buf[3];
472     void (*status_func)(struct input_code *, nkf_char);
473     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
474     int _file_stat;
475 };
476
477 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
478 static nkf_encoding *input_encoding = NULL;
479 static nkf_encoding *output_encoding = NULL;
480 static void set_output_encoding(nkf_encoding *enc);
481
482 #if !defined(PERL_XS) && !defined(WIN32DLL)
483 static  nkf_char     noconvert(FILE *f);
484 #endif
485 static  void    module_connection(void);
486 static  nkf_char     kanji_convert(FILE *f);
487 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
488 static  nkf_char     push_hold_buf(nkf_char c2);
489 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
490 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
491 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
492 /* UCS Mapping
493  * 0: Shift_JIS, eucJP-ascii
494  * 1: eucJP-ms
495  * 2: CP932, CP51932
496  * 3: CP10001
497  */
498 #define UCS_MAP_ASCII   0
499 #define UCS_MAP_MS      1
500 #define UCS_MAP_CP932   2
501 #define UCS_MAP_CP10001 3
502 static int ms_ucs_map_f = UCS_MAP_ASCII;
503 #endif
504 #ifdef UTF8_INPUT_ENABLE
505 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
506 static  int     no_cp932ext_f = FALSE;
507 /* ignore ZERO WIDTH NO-BREAK SPACE */
508 static  int     no_best_fit_chars_f = FALSE;
509 static  int     input_endian = ENDIAN_BIG;
510 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
511 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
512 static  void    encode_fallback_html(nkf_char c);
513 static  void    encode_fallback_xml(nkf_char c);
514 static  void    encode_fallback_java(nkf_char c);
515 static  void    encode_fallback_perl(nkf_char c);
516 static  void    encode_fallback_subchar(nkf_char c);
517 static  void    (*encode_fallback)(nkf_char c) = NULL;
518 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
519 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
520 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
521 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
522 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
523 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
524 static  void    w_status(struct input_code *, nkf_char);
525 #endif
526 #ifdef UTF8_OUTPUT_ENABLE
527 static  int     output_bom_f = FALSE;
528 static  int     output_endian = ENDIAN_BIG;
529 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
530 #endif
531 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
532 static  void    fold_conv(nkf_char c2,nkf_char c1);
533 static  void    nl_conv(nkf_char c2,nkf_char c1);
534 static  void    z_conv(nkf_char c2,nkf_char c1);
535 static  void    rot_conv(nkf_char c2,nkf_char c1);
536 static  void    hira_conv(nkf_char c2,nkf_char c1);
537 static  void    base64_conv(nkf_char c2,nkf_char c1);
538 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
539 static  void    no_connection(nkf_char c2,nkf_char c1);
540 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
541
542 static  void    code_score(struct input_code *ptr);
543 static  void    code_status(nkf_char c);
544
545 static  void    std_putc(nkf_char c);
546 static  nkf_char     std_getc(FILE *f);
547 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
548
549 static  nkf_char     broken_getc(FILE *f);
550 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
551
552 static  nkf_char     mime_begin(FILE *f);
553 static  nkf_char     mime_getc(FILE *f);
554 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
555
556 static  void    switch_mime_getc(void);
557 static  void    unswitch_mime_getc(void);
558 static  nkf_char     mime_begin_strict(FILE *f);
559 static  nkf_char     mime_getc_buf(FILE *f);
560 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
561 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
562
563 static  nkf_char     base64decode(nkf_char c);
564 static  void    mime_prechar(nkf_char c2, nkf_char c1);
565 static  void    mime_putc(nkf_char c);
566 static  void    open_mime(nkf_char c);
567 static  void    close_mime(void);
568 static  void    eof_mime(void);
569 static  void    mimeout_addchar(nkf_char c);
570 #ifndef PERL_XS
571 static  void    usage(void);
572 static  void    version(void);
573 static  void    show_configuration(void);
574 #endif
575 static  void    options(unsigned char *c);
576 static  void    reinit(void);
577
578 /* buffers */
579
580 #if !defined(PERL_XS) && !defined(WIN32DLL)
581 static unsigned char   stdibuf[IOBUF_SIZE];
582 static unsigned char   stdobuf[IOBUF_SIZE];
583 #endif
584 static unsigned char   hold_buf[HOLD_SIZE*2];
585 static int             hold_count = 0;
586
587 /* MIME preprocessor fifo */
588
589 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
590 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
591 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
592 static unsigned char           mime_buf[MIME_BUF_SIZE];
593 static unsigned int            mime_top = 0;
594 static unsigned int            mime_last = 0;  /* decoded */
595 static unsigned int            mime_input = 0; /* undecoded */
596 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
597
598 /* flags */
599 static int             unbuf_f = FALSE;
600 static int             estab_f = FALSE;
601 static int             nop_f = FALSE;
602 static int             binmode_f = TRUE;       /* binary mode */
603 static int             rot_f = FALSE;          /* rot14/43 mode */
604 static int             hira_f = FALSE;          /* hira/kata henkan */
605 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
606 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
607 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
608 static int             mimebuf_f = FALSE;      /* MIME buffered input */
609 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
610 static int             iso8859_f = FALSE;      /* ISO8859 through */
611 static int             mimeout_f = FALSE;       /* base64 mode */
612 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
613 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
614
615 #ifdef UNICODE_NORMALIZATION
616 static int nfc_f = FALSE;
617 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
618 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
619 static nkf_char nfc_getc(FILE *f);
620 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
621 #endif
622
623 #ifdef INPUT_OPTION
624 static int cap_f = FALSE;
625 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
626 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
627 static nkf_char cap_getc(FILE *f);
628 static nkf_char cap_ungetc(nkf_char c,FILE *f);
629
630 static int url_f = FALSE;
631 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
632 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
633 static nkf_char url_getc(FILE *f);
634 static nkf_char url_ungetc(nkf_char c,FILE *f);
635 #endif
636
637 #if defined(INT_IS_SHORT)
638 #define NKF_INT32_C(n)   (n##L)
639 #else
640 #define NKF_INT32_C(n)   (n)
641 #endif
642 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
643 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
644 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
645 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
646 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
647 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
648 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
649
650 #ifdef NUMCHAR_OPTION
651 static int numchar_f = FALSE;
652 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
653 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
654 static nkf_char numchar_getc(FILE *f);
655 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
656 #endif
657
658 #ifdef CHECK_OPTION
659 static int noout_f = FALSE;
660 static void no_putc(nkf_char c);
661 static int debug_f = FALSE;
662 static void debug(const char *str);
663 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
664 #endif
665
666 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
667 #if !defined PERL_XS
668 static  void    print_guessed_code(char *filename);
669 #endif
670 static  void    set_input_codename(char *codename);
671
672 #ifdef EXEC_IO
673 static int exec_f = 0;
674 #endif
675
676 #ifdef SHIFTJIS_CP932
677 /* invert IBM extended characters to others */
678 static int cp51932_f = FALSE;
679
680 /* invert NEC-selected IBM extended characters to IBM extended characters */
681 static int cp932inv_f = TRUE;
682
683 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
684 #endif /* SHIFTJIS_CP932 */
685
686 #ifdef X0212_ENABLE
687 static int x0212_f = FALSE;
688 static nkf_char x0212_shift(nkf_char c);
689 static nkf_char x0212_unshift(nkf_char c);
690 #endif
691 static int x0213_f = FALSE;
692
693 static unsigned char prefix_table[256];
694
695 static void set_code_score(struct input_code *ptr, nkf_char score);
696 static void clr_code_score(struct input_code *ptr, nkf_char score);
697 static void status_disable(struct input_code *ptr);
698 static void status_push_ch(struct input_code *ptr, nkf_char c);
699 static void status_clear(struct input_code *ptr);
700 static void status_reset(struct input_code *ptr);
701 static void status_reinit(struct input_code *ptr);
702 static void status_check(struct input_code *ptr, nkf_char c);
703 static void e_status(struct input_code *, nkf_char);
704 static void s_status(struct input_code *, nkf_char);
705
706 struct input_code input_code_list[] = {
707     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
708     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
709 #ifdef UTF8_INPUT_ENABLE
710     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
711     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
712     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
713 #endif
714     {0}
715 };
716
717 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
718 static int              base64_count = 0;
719
720 /* X0208 -> ASCII converter */
721
722 /* fold parameter */
723 static int             f_line = 0;    /* chars in line */
724 static int             f_prev = 0;
725 static int             fold_preserve_f = FALSE; /* preserve new lines */
726 static int             fold_f  = FALSE;
727 static int             fold_len  = 0;
728
729 /* options */
730 static unsigned char   kanji_intro = DEFAULT_J;
731 static unsigned char   ascii_intro = DEFAULT_R;
732
733 /* Folding */
734
735 #define FOLD_MARGIN  10
736 #define DEFAULT_FOLD 60
737
738 static int             fold_margin  = FOLD_MARGIN;
739
740 /* process default */
741 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
742 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
743
744 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
745 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
746 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
747 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
748 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
749 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
750 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
751
752 /* static redirections */
753
754 static  void   (*o_putc)(nkf_char c) = std_putc;
755
756 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
757 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
758
759 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
760 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
761
762 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
763
764 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
765 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
766
767 /* for strict mime */
768 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
769 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
770
771 /* Global states */
772 static int output_mode = ASCII,    /* output kanji mode */
773            input_mode =  ASCII,    /* input kanji mode */
774            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
775 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
776
777 /* X0201 / X0208 conversion tables */
778
779 /* X0201 kana conversion table */
780 /* 90-9F A0-DF */
781 static const unsigned char cv[]= {
782     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
783     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
784     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
785     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
786     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
787     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
788     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
789     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
790     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
791     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
792     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
793     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
794     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
795     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
796     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
797     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
798     0x00,0x00};
799
800
801 /* X0201 kana conversion table for daguten */
802 /* 90-9F A0-DF */
803 static const unsigned char dv[]= {
804     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
807     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
808     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
809     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
810     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
811     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
812     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
813     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
814     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
815     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
816     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
817     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820     0x00,0x00};
821
822 /* X0201 kana conversion table for han-daguten */
823 /* 90-9F A0-DF */
824 static const unsigned char ev[]= {
825     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
827     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
828     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
832     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
833     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
836     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
837     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
838     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841     0x00,0x00};
842
843
844 /* X0208 kigou conversion table */
845 /* 0x8140 - 0x819e */
846 static const unsigned char fv[] = {
847
848     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
849     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
850     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
851     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
852     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
853     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
854     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
855     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
856     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
857     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
858     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
859     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
860 } ;
861
862
863
864 static int             file_out_f = FALSE;
865 #ifdef OVERWRITE
866 static int             overwrite_f = FALSE;
867 static int             preserve_time_f = FALSE;
868 static int             backup_f = FALSE;
869 static char            *backup_suffix = "";
870 static char *get_backup_filename(const char *suffix, const char *filename);
871 #endif
872
873 static int nlmode_f = 0;   /* CR, LF, CRLF */
874 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
875 static nkf_char prev_cr = 0; /* CR or 0 */
876 #ifdef EASYWIN /*Easy Win */
877 static int             end_check;
878 #endif /*Easy Win */
879
880 #define STD_GC_BUFSIZE (256)
881 nkf_char std_gc_buf[STD_GC_BUFSIZE];
882 nkf_char std_gc_ndx;
883
884 char* nkf_strcpy(const char *str)
885 {
886     char* result = malloc(strlen(str) + 1);
887     if (!result){
888         perror(str);
889         return "";
890     }
891     strcpy(result, str);
892     return result;
893 }
894
895 static void nkf_str_upcase(const char *src, char *dest, size_t length)
896 {
897     int i = 0;
898     for (; i < length && src[i]; i++) {
899         dest[i] = nkf_toupper(src[i]);
900     }
901     dest[i] = 0;
902 }
903
904 static nkf_encoding *nkf_enc_from_index(int idx)
905 {
906     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
907         return 0;
908     }
909     return &nkf_encoding_table[idx];
910 }
911
912 static int nkf_enc_find_index(const char *name)
913 {
914     int i, index = -1;
915     if (*name == 'X' && *(name+1) == '-') name += 2;
916     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
917         if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
918             return encoding_name_to_id_table[i].id;
919         }
920     }
921     return index;
922 }
923
924 static nkf_encoding *nkf_enc_find(const char *name)
925 {
926     int idx = -1;
927     idx = nkf_enc_find_index(name);
928     if (idx < 0) return 0;
929     return nkf_enc_from_index(idx);
930 }
931
932 #define nkf_enc_name(enc) (enc)->name
933 #define nkf_enc_to_index(enc) (enc)->id
934 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
935 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
936 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
937 #define nkf_enc_asciicompat(enc) (\
938     nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
939     nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
940 #define nkf_enc_unicode_p(enc) (\
941     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
942     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
943     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
944 #define nkf_enc_cp5022x_p(enc) (\
945     nkf_enc_to_index(enc) == CP50220 ||\
946     nkf_enc_to_index(enc) == CP50221 ||\
947     nkf_enc_to_index(enc) == CP50222)
948
949 #ifndef DEFAULT_ENCIDX
950 static char* nkf_locale_charmap()
951 {
952 #ifdef HAVE_LANGINFO_H
953     return nl_langinfo(CODESET);
954 #elif defined(__WIN32__)
955     return sprintf("CP%d", GetACP());
956 #else
957     return NULL;
958 #endif
959 }
960
961 static nkf_encoding* nkf_locale_encoding()
962 {
963     nkf_encoding *enc = 0;
964     char *encname = nkf_locale_charmap();
965     if (encname)
966         enc = nkf_enc_find(encname);
967     if (enc < 0) enc = 0;
968     return enc;
969 }
970 #endif
971
972 static nkf_encoding* nkf_default_encoding()
973 {
974 #ifdef DEFAULT_ENCIDX
975     return nkf_enc_from_index(DEFAULT_ENCIDX);
976 #else
977     nkf_encoding *enc = nkf_locale_encoding();
978     if (enc <= 0) enc = nkf_enc_from_index(ISO_2022_JP);
979     return enc;
980 #endif
981 }
982
983 #ifdef WIN32DLL
984 #include "nkf32dll.c"
985 #elif defined(PERL_XS)
986 #else /* WIN32DLL */
987 int main(int argc, char **argv)
988 {
989     FILE  *fin;
990     unsigned char  *cp;
991
992     char *outfname = NULL;
993     char *origfname;
994
995 #ifdef EASYWIN /*Easy Win */
996     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
997 #endif
998     setlocale(LC_CTYPE, "");
999
1000     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
1001         cp = (unsigned char *)*argv;
1002         options(cp);
1003 #ifdef EXEC_IO
1004         if (exec_f){
1005             int fds[2], pid;
1006             if (pipe(fds) < 0 || (pid = fork()) < 0){
1007                 abort();
1008             }
1009             if (pid == 0){
1010                 if (exec_f > 0){
1011                     close(fds[0]);
1012                     dup2(fds[1], 1);
1013                 }else{
1014                     close(fds[1]);
1015                     dup2(fds[0], 0);
1016                 }
1017                 execvp(argv[1], &argv[1]);
1018             }
1019             if (exec_f > 0){
1020                 close(fds[1]);
1021                 dup2(fds[0], 0);
1022             }else{
1023                 close(fds[0]);
1024                 dup2(fds[1], 1);
1025             }
1026             argc = 0;
1027             break;
1028         }
1029 #endif
1030     }
1031
1032     if (guess_f) {
1033 #ifdef CHECK_OPTION
1034         int debug_f_back = debug_f;
1035 #endif
1036 #ifdef EXEC_IO
1037         int exec_f_back = exec_f;
1038 #endif
1039 #ifdef X0212_ENABLE
1040         int x0212_f_back = x0212_f;
1041 #endif
1042         int x0213_f_back = x0213_f;
1043         int guess_f_back = guess_f;
1044         reinit();
1045         guess_f = guess_f_back;
1046         mime_f = FALSE;
1047 #ifdef CHECK_OPTION
1048         debug_f = debug_f_back;
1049 #endif
1050 #ifdef EXEC_IO
1051         exec_f = exec_f_back;
1052 #endif
1053 #ifdef X0212_ENABLE
1054         x0212_f = x0212_f_back;
1055 #endif
1056         x0213_f = x0213_f_back;
1057     }
1058
1059     if (binmode_f == TRUE)
1060 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1061     if (freopen("","wb",stdout) == NULL)
1062         return (-1);
1063 #else
1064     setbinmode(stdout);
1065 #endif
1066
1067     if (unbuf_f)
1068       setbuf(stdout, (char *) NULL);
1069     else
1070       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
1071
1072     if (argc == 0) {
1073       if (binmode_f == TRUE)
1074 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1075       if (freopen("","rb",stdin) == NULL) return (-1);
1076 #else
1077       setbinmode(stdin);
1078 #endif
1079       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
1080       if (nop_f)
1081           noconvert(stdin);
1082       else {
1083           kanji_convert(stdin);
1084           if (guess_f) print_guessed_code(NULL);
1085       }
1086     } else {
1087       int nfiles = argc;
1088         int is_argument_error = FALSE;
1089       while (argc--) {
1090             input_codename = NULL;
1091             input_newline = 0;
1092 #ifdef CHECK_OPTION
1093             iconv_for_check = 0;
1094 #endif
1095           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
1096                 perror(*(argv-1));
1097                 is_argument_error = TRUE;
1098                 continue;
1099           } else {
1100 #ifdef OVERWRITE
1101               int fd = 0;
1102               int fd_backup = 0;
1103 #endif
1104
1105 /* reopen file for stdout */
1106               if (file_out_f == TRUE) {
1107 #ifdef OVERWRITE
1108                   if (overwrite_f){
1109                       outfname = malloc(strlen(origfname)
1110                                         + strlen(".nkftmpXXXXXX")
1111                                         + 1);
1112                       if (!outfname){
1113                           perror(origfname);
1114                           return -1;
1115                       }
1116                       strcpy(outfname, origfname);
1117 #ifdef MSDOS
1118                       {
1119                           int i;
1120                           for (i = strlen(outfname); i; --i){
1121                               if (outfname[i - 1] == '/'
1122                                   || outfname[i - 1] == '\\'){
1123                                   break;
1124                               }
1125                           }
1126                           outfname[i] = '\0';
1127                       }
1128                       strcat(outfname, "ntXXXXXX");
1129                       mktemp(outfname);
1130                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
1131                                 S_IREAD | S_IWRITE);
1132 #else
1133                       strcat(outfname, ".nkftmpXXXXXX");
1134                       fd = mkstemp(outfname);
1135 #endif
1136                       if (fd < 0
1137                           || (fd_backup = dup(fileno(stdout))) < 0
1138                           || dup2(fd, fileno(stdout)) < 0
1139                           ){
1140                           perror(origfname);
1141                           return -1;
1142                       }
1143                   }else
1144 #endif
1145                   if(argc == 1) {
1146                       outfname = *argv++;
1147                       argc--;
1148                   } else {
1149                       outfname = "nkf.out";
1150                   }
1151
1152                   if(freopen(outfname, "w", stdout) == NULL) {
1153                       perror (outfname);
1154                       return (-1);
1155                   }
1156                   if (binmode_f == TRUE) {
1157 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1158                       if (freopen("","wb",stdout) == NULL)
1159                            return (-1);
1160 #else
1161                       setbinmode(stdout);
1162 #endif
1163                   }
1164               }
1165               if (binmode_f == TRUE)
1166 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1167                  if (freopen("","rb",fin) == NULL)
1168                     return (-1);
1169 #else
1170                  setbinmode(fin);
1171 #endif
1172               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
1173               if (nop_f)
1174                   noconvert(fin);
1175               else {
1176                   char *filename = NULL;
1177                   kanji_convert(fin);
1178                   if (nfiles > 1) filename = origfname;
1179                   if (guess_f) print_guessed_code(filename);
1180               }
1181               fclose(fin);
1182 #ifdef OVERWRITE
1183               if (overwrite_f) {
1184                   struct stat     sb;
1185 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1186                   time_t tb[2];
1187 #else
1188                   struct utimbuf  tb;
1189 #endif
1190
1191                   fflush(stdout);
1192                   close(fd);
1193                   if (dup2(fd_backup, fileno(stdout)) < 0){
1194                       perror("dup2");
1195                   }
1196                   if (stat(origfname, &sb)) {
1197                       fprintf(stderr, "Can't stat %s\n", origfname);
1198                   }
1199                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
1200                   if (chmod(outfname, sb.st_mode)) {
1201                       fprintf(stderr, "Can't set permission %s\n", outfname);
1202                   }
1203
1204                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
1205                     if(preserve_time_f){
1206 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1207                         tb[0] = tb[1] = sb.st_mtime;
1208                         if (utime(outfname, tb)) {
1209                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1210                         }
1211 #else
1212                         tb.actime  = sb.st_atime;
1213                         tb.modtime = sb.st_mtime;
1214                         if (utime(outfname, &tb)) {
1215                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1216                         }
1217 #endif
1218                     }
1219                     if(backup_f){
1220                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
1221 #ifdef MSDOS
1222                         unlink(backup_filename);
1223 #endif
1224                         if (rename(origfname, backup_filename)) {
1225                             perror(backup_filename);
1226                             fprintf(stderr, "Can't rename %s to %s\n",
1227                                     origfname, backup_filename);
1228                         }
1229                     }else{
1230 #ifdef MSDOS
1231                         if (unlink(origfname)){
1232                             perror(origfname);
1233                         }
1234 #endif
1235                     }
1236                   if (rename(outfname, origfname)) {
1237                       perror(origfname);
1238                       fprintf(stderr, "Can't rename %s to %s\n",
1239                               outfname, origfname);
1240                   }
1241                   free(outfname);
1242               }
1243 #endif
1244           }
1245       }
1246         if (is_argument_error)
1247             return(-1);
1248     }
1249 #ifdef EASYWIN /*Easy Win */
1250     if (file_out_f == FALSE)
1251         scanf("%d",&end_check);
1252     else
1253         fclose(stdout);
1254 #else /* for Other OS */
1255     if (file_out_f == TRUE)
1256         fclose(stdout);
1257 #endif /*Easy Win */
1258     return (0);
1259 }
1260 #endif /* WIN32DLL */
1261
1262 #ifdef OVERWRITE
1263 char *get_backup_filename(const char *suffix, const char *filename)
1264 {
1265     char *backup_filename;
1266     int asterisk_count = 0;
1267     int i, j;
1268     int filename_length = strlen(filename);
1269
1270     for(i = 0; suffix[i]; i++){
1271         if(suffix[i] == '*') asterisk_count++;
1272     }
1273
1274     if(asterisk_count){
1275         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1276         if (!backup_filename){
1277             perror("Can't malloc backup filename.");
1278             return NULL;
1279         }
1280
1281         for(i = 0, j = 0; suffix[i];){
1282             if(suffix[i] == '*'){
1283                 backup_filename[j] = '\0';
1284                 strncat(backup_filename, filename, filename_length);
1285                 i++;
1286                 j += filename_length;
1287             }else{
1288                 backup_filename[j++] = suffix[i++];
1289             }
1290         }
1291         backup_filename[j] = '\0';
1292     }else{
1293         j = strlen(suffix) + filename_length;
1294         backup_filename = malloc( + 1);
1295         strcpy(backup_filename, filename);
1296         strcat(backup_filename, suffix);
1297         backup_filename[j] = '\0';
1298     }
1299     return backup_filename;
1300 }
1301 #endif
1302
1303 static const struct {
1304     const char *name;
1305     const char *alias;
1306 } long_option[] = {
1307     {"ic=", ""},
1308     {"oc=", ""},
1309     {"base64","jMB"},
1310     {"euc","e"},
1311     {"euc-input","E"},
1312     {"fj","jm"},
1313     {"help","v"},
1314     {"jis","j"},
1315     {"jis-input","J"},
1316     {"mac","sLm"},
1317     {"mime","jM"},
1318     {"mime-input","m"},
1319     {"msdos","sLw"},
1320     {"sjis","s"},
1321     {"sjis-input","S"},
1322     {"unix","eLu"},
1323     {"version","V"},
1324     {"windows","sLw"},
1325     {"hiragana","h1"},
1326     {"katakana","h2"},
1327     {"katakana-hiragana","h3"},
1328     {"guess=", ""},
1329     {"guess", "g2"},
1330     {"cp932", ""},
1331     {"no-cp932", ""},
1332 #ifdef X0212_ENABLE
1333     {"x0212", ""},
1334 #endif
1335 #ifdef UTF8_OUTPUT_ENABLE
1336     {"utf8", "w"},
1337     {"utf16", "w16"},
1338     {"ms-ucs-map", ""},
1339     {"fb-skip", ""},
1340     {"fb-html", ""},
1341     {"fb-xml", ""},
1342     {"fb-perl", ""},
1343     {"fb-java", ""},
1344     {"fb-subchar", ""},
1345     {"fb-subchar=", ""},
1346 #endif
1347 #ifdef UTF8_INPUT_ENABLE
1348     {"utf8-input", "W"},
1349     {"utf16-input", "W16"},
1350     {"no-cp932ext", ""},
1351     {"no-best-fit-chars",""},
1352 #endif
1353 #ifdef UNICODE_NORMALIZATION
1354     {"utf8mac-input", ""},
1355 #endif
1356 #ifdef OVERWRITE
1357     {"overwrite", ""},
1358     {"overwrite=", ""},
1359     {"in-place", ""},
1360     {"in-place=", ""},
1361 #endif
1362 #ifdef INPUT_OPTION
1363     {"cap-input", ""},
1364     {"url-input", ""},
1365 #endif
1366 #ifdef NUMCHAR_OPTION
1367     {"numchar-input", ""},
1368 #endif
1369 #ifdef CHECK_OPTION
1370     {"no-output", ""},
1371     {"debug", ""},
1372 #endif
1373 #ifdef SHIFTJIS_CP932
1374     {"cp932inv", ""},
1375 #endif
1376 #ifdef EXEC_IO
1377     {"exec-in", ""},
1378     {"exec-out", ""},
1379 #endif
1380     {"prefix=", ""},
1381 };
1382
1383 static void set_input_encoding(nkf_encoding *enc)
1384 {
1385     switch (nkf_enc_to_index(enc)) {
1386     case CP50220:
1387     case CP50221:
1388     case CP50222:
1389 #ifdef SHIFTJIS_CP932
1390         cp51932_f = TRUE;
1391 #endif
1392 #ifdef UTF8_OUTPUT_ENABLE
1393         ms_ucs_map_f = UCS_MAP_CP932;
1394 #endif
1395         break;
1396     case ISO_2022_JP_1:
1397 #ifdef X0212_ENABLE
1398         x0212_f = TRUE;
1399 #endif
1400         break;
1401     case ISO_2022_JP_3:
1402 #ifdef X0212_ENABLE
1403         x0212_f = TRUE;
1404 #endif
1405         x0213_f = TRUE;
1406         break;
1407     case SHIFT_JIS:
1408         break;
1409     case WINDOWS_31J:
1410 #ifdef SHIFTJIS_CP932
1411         cp51932_f = TRUE;
1412 #endif
1413 #ifdef UTF8_OUTPUT_ENABLE
1414         ms_ucs_map_f = UCS_MAP_CP932;
1415 #endif
1416         break;
1417     case EUC_JP:
1418         break;
1419     case CP10001:
1420 #ifdef SHIFTJIS_CP932
1421         cp51932_f = TRUE;
1422 #endif
1423 #ifdef UTF8_OUTPUT_ENABLE
1424         ms_ucs_map_f = UCS_MAP_CP10001;
1425 #endif
1426         break;
1427     case CP51932:
1428 #ifdef SHIFTJIS_CP932
1429         cp51932_f = TRUE;
1430 #endif
1431 #ifdef UTF8_OUTPUT_ENABLE
1432         ms_ucs_map_f = UCS_MAP_CP932;
1433 #endif
1434         break;
1435     case EUCJP_MS:
1436 #ifdef SHIFTJIS_CP932
1437         cp51932_f = FALSE;
1438 #endif
1439 #ifdef UTF8_OUTPUT_ENABLE
1440         ms_ucs_map_f = UCS_MAP_MS;
1441 #endif
1442         break;
1443     case EUCJP_ASCII:
1444 #ifdef SHIFTJIS_CP932
1445         cp51932_f = FALSE;
1446 #endif
1447 #ifdef UTF8_OUTPUT_ENABLE
1448         ms_ucs_map_f = UCS_MAP_ASCII;
1449 #endif
1450         break;
1451     case SHIFT_JISX0213:
1452     case SHIFT_JIS_2004:
1453         x0213_f = TRUE;
1454 #ifdef SHIFTJIS_CP932
1455         cp51932_f = FALSE;
1456 #endif
1457         break;
1458     case EUC_JISX0213:
1459     case EUC_JIS_2004:
1460         x0213_f = TRUE;
1461 #ifdef SHIFTJIS_CP932
1462         cp51932_f = FALSE;
1463 #endif
1464         break;
1465 #ifdef UTF8_INPUT_ENABLE
1466 #ifdef UNICODE_NORMALIZATION
1467     case UTF8_MAC:
1468         nfc_f = TRUE;
1469         break;
1470 #endif
1471     case UTF_16:
1472     case UTF_16BE:
1473     case UTF_16BE_BOM:
1474         input_endian = ENDIAN_BIG;
1475         break;
1476     case UTF_16LE:
1477     case UTF_16LE_BOM:
1478         input_endian = ENDIAN_LITTLE;
1479         break;
1480     case UTF_32:
1481     case UTF_32BE:
1482     case UTF_32BE_BOM:
1483         input_endian = ENDIAN_BIG;
1484         break;
1485     case UTF_32LE:
1486     case UTF_32LE_BOM:
1487         input_endian = ENDIAN_LITTLE;
1488         break;
1489 #endif
1490     }
1491 }
1492
1493 static void set_output_encoding(nkf_encoding *enc)
1494 {
1495     switch (nkf_enc_to_index(enc)) {
1496     case CP50220:
1497         x0201_f = TRUE;
1498 #ifdef SHIFTJIS_CP932
1499         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1500 #endif
1501 #ifdef UTF8_OUTPUT_ENABLE
1502         ms_ucs_map_f = UCS_MAP_CP932;
1503 #endif
1504         break;
1505     case CP50221:
1506 #ifdef SHIFTJIS_CP932
1507         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1508 #endif
1509 #ifdef UTF8_OUTPUT_ENABLE
1510         ms_ucs_map_f = UCS_MAP_CP932;
1511 #endif
1512         break;
1513     case ISO_2022_JP_1:
1514 #ifdef X0212_ENABLE
1515         x0212_f = TRUE;
1516 #endif
1517 #ifdef SHIFTJIS_CP932
1518         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1519 #endif
1520         break;
1521     case ISO_2022_JP_3:
1522 #ifdef X0212_ENABLE
1523         x0212_f = TRUE;
1524 #endif
1525         x0213_f = TRUE;
1526 #ifdef SHIFTJIS_CP932
1527         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1528 #endif
1529         break;
1530     case SHIFT_JIS:
1531         break;
1532     case WINDOWS_31J:
1533 #ifdef UTF8_OUTPUT_ENABLE
1534         ms_ucs_map_f = UCS_MAP_CP932;
1535 #endif
1536         break;
1537     case CP10001:
1538 #ifdef UTF8_OUTPUT_ENABLE
1539         ms_ucs_map_f = UCS_MAP_CP10001;
1540 #endif
1541         break;
1542     case EUC_JP:
1543         x0212_f = TRUE;
1544 #ifdef SHIFTJIS_CP932
1545         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1546 #endif
1547 #ifdef UTF8_OUTPUT_ENABLE
1548         ms_ucs_map_f = UCS_MAP_CP932;
1549 #endif
1550         break;
1551     case CP51932:
1552 #ifdef SHIFTJIS_CP932
1553         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1554 #endif
1555 #ifdef UTF8_OUTPUT_ENABLE
1556         ms_ucs_map_f = UCS_MAP_CP932;
1557 #endif
1558         break;
1559     case EUCJP_MS:
1560 #ifdef X0212_ENABLE
1561         x0212_f = TRUE;
1562 #endif
1563 #ifdef UTF8_OUTPUT_ENABLE
1564         ms_ucs_map_f = UCS_MAP_MS;
1565 #endif
1566         break;
1567     case EUCJP_ASCII:
1568 #ifdef X0212_ENABLE
1569         x0212_f = TRUE;
1570 #endif
1571 #ifdef UTF8_OUTPUT_ENABLE
1572         ms_ucs_map_f = UCS_MAP_ASCII;
1573 #endif
1574         break;
1575     case SHIFT_JISX0213:
1576     case SHIFT_JIS_2004:
1577         x0213_f = TRUE;
1578 #ifdef SHIFTJIS_CP932
1579         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1580 #endif
1581         break;
1582     case EUC_JISX0213:
1583     case EUC_JIS_2004:
1584 #ifdef X0212_ENABLE
1585         x0212_f = TRUE;
1586 #endif
1587         x0213_f = TRUE;
1588 #ifdef SHIFTJIS_CP932
1589         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1590 #endif
1591         break;
1592 #ifdef UTF8_OUTPUT_ENABLE
1593     case UTF_8_BOM:
1594         output_bom_f = TRUE;
1595         break;
1596     case UTF_16:
1597     case UTF_16BE_BOM:
1598         output_bom_f = TRUE;
1599         break;
1600     case UTF_16LE:
1601         output_endian = ENDIAN_LITTLE;
1602         output_bom_f = FALSE;
1603         break;
1604     case UTF_16LE_BOM:
1605         output_endian = ENDIAN_LITTLE;
1606         output_bom_f = TRUE;
1607         break;
1608     case UTF_32BE_BOM:
1609         output_bom_f = TRUE;
1610         break;
1611     case UTF_32LE:
1612         output_endian = ENDIAN_LITTLE;
1613         output_bom_f = FALSE;
1614         break;
1615     case UTF_32LE_BOM:
1616         output_endian = ENDIAN_LITTLE;
1617         output_bom_f = TRUE;
1618         break;
1619 #endif
1620     }
1621 }
1622
1623 static int option_mode = 0;
1624
1625 void options(unsigned char *cp)
1626 {
1627     nkf_char i, j;
1628     unsigned char *p;
1629     unsigned char *cp_back = NULL;
1630     char codeset[32];
1631     nkf_encoding *enc;
1632
1633     if (option_mode==1)
1634         return;
1635     while(*cp && *cp++!='-');
1636     while (*cp || cp_back) {
1637         if(!*cp){
1638             cp = cp_back;
1639             cp_back = NULL;
1640             continue;
1641         }
1642         p = 0;
1643         switch (*cp++) {
1644         case '-':  /* literal options */
1645             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1646                 option_mode = 1;
1647                 return;
1648             }
1649             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1650                 p = (unsigned char *)long_option[i].name;
1651                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1652                 if (*p == cp[j] || cp[j] == SP){
1653                     p = &cp[j] + 1;
1654                     break;
1655                 }
1656                 p = 0;
1657             }
1658             if (p == 0) {
1659                 fprintf(stderr, "unknown long option: --%s\n", cp);
1660                 return;
1661             }
1662             while(*cp && *cp != SP && cp++);
1663             if (long_option[i].alias[0]){
1664                 cp_back = cp;
1665                 cp = (unsigned char *)long_option[i].alias;
1666             }else{
1667                 if (strcmp(long_option[i].name, "ic=") == 0){
1668                     nkf_str_upcase((char *)p, codeset, 32);
1669                     enc = nkf_enc_find(codeset);
1670                     if (!enc) continue;
1671                     input_encoding = enc;
1672                     continue;
1673                 }
1674                 if (strcmp(long_option[i].name, "oc=") == 0){
1675                     nkf_str_upcase((char *)p, codeset, 32);
1676                     enc = nkf_enc_find(codeset);
1677                     if (enc <= 0) continue;
1678                     output_encoding = enc;
1679                     continue;
1680                 }
1681                 if (strcmp(long_option[i].name, "guess=") == 0){
1682                     if (p[0] == '0' || p[0] == '1') {
1683                         guess_f = 1;
1684                     } else {
1685                         guess_f = 2;
1686                     }
1687                     continue;
1688                 }
1689 #ifdef OVERWRITE
1690                 if (strcmp(long_option[i].name, "overwrite") == 0){
1691                     file_out_f = TRUE;
1692                     overwrite_f = TRUE;
1693                     preserve_time_f = TRUE;
1694                     continue;
1695                 }
1696                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1697                     file_out_f = TRUE;
1698                     overwrite_f = TRUE;
1699                     preserve_time_f = TRUE;
1700                     backup_f = TRUE;
1701                     backup_suffix = malloc(strlen((char *) p) + 1);
1702                     strcpy(backup_suffix, (char *) p);
1703                     continue;
1704                 }
1705                 if (strcmp(long_option[i].name, "in-place") == 0){
1706                     file_out_f = TRUE;
1707                     overwrite_f = TRUE;
1708                     preserve_time_f = FALSE;
1709                     continue;
1710                 }
1711                 if (strcmp(long_option[i].name, "in-place=") == 0){
1712                     file_out_f = TRUE;
1713                     overwrite_f = TRUE;
1714                     preserve_time_f = FALSE;
1715                     backup_f = TRUE;
1716                     backup_suffix = malloc(strlen((char *) p) + 1);
1717                     strcpy(backup_suffix, (char *) p);
1718                     continue;
1719                 }
1720 #endif
1721 #ifdef INPUT_OPTION
1722                 if (strcmp(long_option[i].name, "cap-input") == 0){
1723                     cap_f = TRUE;
1724                     continue;
1725                 }
1726                 if (strcmp(long_option[i].name, "url-input") == 0){
1727                     url_f = TRUE;
1728                     continue;
1729                 }
1730 #endif
1731 #ifdef NUMCHAR_OPTION
1732                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1733                     numchar_f = TRUE;
1734                     continue;
1735                 }
1736 #endif
1737 #ifdef CHECK_OPTION
1738                 if (strcmp(long_option[i].name, "no-output") == 0){
1739                     noout_f = TRUE;
1740                     continue;
1741                 }
1742                 if (strcmp(long_option[i].name, "debug") == 0){
1743                     debug_f = TRUE;
1744                     continue;
1745                 }
1746 #endif
1747                 if (strcmp(long_option[i].name, "cp932") == 0){
1748 #ifdef SHIFTJIS_CP932
1749                     cp51932_f = TRUE;
1750                     cp932inv_f = -TRUE;
1751 #endif
1752 #ifdef UTF8_OUTPUT_ENABLE
1753                     ms_ucs_map_f = UCS_MAP_CP932;
1754 #endif
1755                     continue;
1756                 }
1757                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1758 #ifdef SHIFTJIS_CP932
1759                     cp51932_f = FALSE;
1760                     cp932inv_f = FALSE;
1761 #endif
1762 #ifdef UTF8_OUTPUT_ENABLE
1763                     ms_ucs_map_f = UCS_MAP_ASCII;
1764 #endif
1765                     continue;
1766                 }
1767 #ifdef SHIFTJIS_CP932
1768                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1769                     cp932inv_f = -TRUE;
1770                     continue;
1771                 }
1772 #endif
1773
1774 #ifdef X0212_ENABLE
1775                 if (strcmp(long_option[i].name, "x0212") == 0){
1776                     x0212_f = TRUE;
1777                     continue;
1778                 }
1779 #endif
1780
1781 #ifdef EXEC_IO
1782                   if (strcmp(long_option[i].name, "exec-in") == 0){
1783                       exec_f = 1;
1784                       return;
1785                   }
1786                   if (strcmp(long_option[i].name, "exec-out") == 0){
1787                       exec_f = -1;
1788                       return;
1789                   }
1790 #endif
1791 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1792                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1793                     no_cp932ext_f = TRUE;
1794                     continue;
1795                 }
1796                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1797                     no_best_fit_chars_f = TRUE;
1798                     continue;
1799                 }
1800                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1801                     encode_fallback = NULL;
1802                     continue;
1803                 }
1804                 if (strcmp(long_option[i].name, "fb-html") == 0){
1805                     encode_fallback = encode_fallback_html;
1806                     continue;
1807                 }
1808                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1809                     encode_fallback = encode_fallback_xml;
1810                     continue;
1811                 }
1812                 if (strcmp(long_option[i].name, "fb-java") == 0){
1813                     encode_fallback = encode_fallback_java;
1814                     continue;
1815                 }
1816                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1817                     encode_fallback = encode_fallback_perl;
1818                     continue;
1819                 }
1820                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1821                     encode_fallback = encode_fallback_subchar;
1822                     continue;
1823                 }
1824                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1825                     encode_fallback = encode_fallback_subchar;
1826                     unicode_subchar = 0;
1827                     if (p[0] != '0'){
1828                         /* decimal number */
1829                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1830                             unicode_subchar *= 10;
1831                             unicode_subchar += hex2bin(p[i]);
1832                         }
1833                     }else if(p[1] == 'x' || p[1] == 'X'){
1834                         /* hexadecimal number */
1835                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1836                             unicode_subchar <<= 4;
1837                             unicode_subchar |= hex2bin(p[i]);
1838                         }
1839                     }else{
1840                         /* octal number */
1841                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1842                             unicode_subchar *= 8;
1843                             unicode_subchar += hex2bin(p[i]);
1844                         }
1845                     }
1846                     w16e_conv(unicode_subchar, &i, &j);
1847                     unicode_subchar = i<<8 | j;
1848                     continue;
1849                 }
1850 #endif
1851 #ifdef UTF8_OUTPUT_ENABLE
1852                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1853                     ms_ucs_map_f = UCS_MAP_MS;
1854                     continue;
1855                 }
1856 #endif
1857 #ifdef UNICODE_NORMALIZATION
1858                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1859                     nfc_f = TRUE;
1860                     continue;
1861                 }
1862 #endif
1863                 if (strcmp(long_option[i].name, "prefix=") == 0){
1864                     if (nkf_isgraph(p[0])){
1865                         for (i = 1; nkf_isgraph(p[i]); i++){
1866                             prefix_table[p[i]] = p[0];
1867                         }
1868                     }
1869                     continue;
1870                 }
1871             }
1872             continue;
1873         case 'b':           /* buffered mode */
1874             unbuf_f = FALSE;
1875             continue;
1876         case 'u':           /* non bufferd mode */
1877             unbuf_f = TRUE;
1878             continue;
1879         case 't':           /* transparent mode */
1880             if (*cp=='1') {
1881                 /* alias of -t */
1882                 cp++;
1883                 nop_f = TRUE;
1884             } else if (*cp=='2') {
1885                 /*
1886                  * -t with put/get
1887                  *
1888                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1889                  *
1890                  */
1891                 cp++;
1892                 nop_f = 2;
1893             } else
1894                 nop_f = TRUE;
1895             continue;
1896         case 'j':           /* JIS output */
1897         case 'n':
1898             output_encoding = nkf_enc_from_index(ISO_2022_JP);
1899             continue;
1900         case 'e':           /* AT&T EUC output */
1901             output_encoding = nkf_enc_from_index(EUC_JP);
1902             continue;
1903         case 's':           /* SJIS output */
1904             output_encoding = nkf_enc_from_index(WINDOWS_31J);
1905             continue;
1906         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1907             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1908             input_encoding = nkf_enc_from_index(ISO_8859_1);
1909             continue;
1910         case 'i':           /* Kanji IN ESC-$-@/B */
1911             if (*cp=='@'||*cp=='B')
1912                 kanji_intro = *cp++;
1913             continue;
1914         case 'o':           /* ASCII IN ESC-(-J/B */
1915             if (*cp=='J'||*cp=='B'||*cp=='H')
1916                 ascii_intro = *cp++;
1917             continue;
1918         case 'h':
1919             /*
1920                 bit:1   katakana->hiragana
1921                 bit:2   hiragana->katakana
1922             */
1923             if ('9'>= *cp && *cp>='0')
1924                 hira_f |= (*cp++ -'0');
1925             else
1926                 hira_f |= 1;
1927             continue;
1928         case 'r':
1929             rot_f = TRUE;
1930             continue;
1931 #if defined(MSDOS) || defined(__OS2__)
1932         case 'T':
1933             binmode_f = FALSE;
1934             continue;
1935 #endif
1936 #ifndef PERL_XS
1937         case 'V':
1938             show_configuration();
1939             exit(1);
1940             break;
1941         case 'v':
1942             usage();
1943             exit(1);
1944             break;
1945 #endif
1946 #ifdef UTF8_OUTPUT_ENABLE
1947         case 'w':           /* UTF-8 output */
1948             if (cp[0] == '8') {
1949                 cp++;
1950                 if (cp[0] == '0'){
1951                     cp++;
1952                     output_encoding = nkf_enc_from_index(UTF_8N);
1953                 } else {
1954                     output_bom_f = TRUE;
1955                     output_encoding = nkf_enc_from_index(UTF_8_BOM);
1956                 }
1957             } else {
1958                 int enc_idx;
1959                 if ('1'== cp[0] && '6'==cp[1]) {
1960                     cp += 2;
1961                     enc_idx = UTF_16;
1962                 } else if ('3'== cp[0] && '2'==cp[1]) {
1963                     cp += 2;
1964                     enc_idx = UTF_32;
1965                 } else {
1966                     output_encoding = nkf_enc_from_index(UTF_8);
1967                     continue;
1968                 }
1969                 if (cp[0]=='L') {
1970                     cp++;
1971                     output_endian = ENDIAN_LITTLE;
1972                 } else if (cp[0] == 'B') {
1973                     cp++;
1974                 } else {
1975                     output_encoding = nkf_enc_from_index(enc_idx);
1976                     continue;
1977                 }
1978                 if (cp[0] == '0'){
1979                     cp++;
1980                     enc_idx = enc_idx == UTF_16
1981                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1982                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1983                 } else {
1984                     output_bom_f = TRUE;
1985                     enc_idx = enc_idx == UTF_16
1986                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
1987                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
1988                 }
1989                 output_encoding = nkf_enc_from_index(enc_idx);
1990             }
1991             continue;
1992 #endif
1993 #ifdef UTF8_INPUT_ENABLE
1994         case 'W':           /* UTF input */
1995             if (cp[0] == '8') {
1996                 cp++;
1997                 input_encoding = nkf_enc_from_index(UTF_8);
1998             }else{
1999                 int enc_idx;
2000                 if ('1'== cp[0] && '6'==cp[1]) {
2001                     cp += 2;
2002                     input_endian = ENDIAN_BIG;
2003                     enc_idx = UTF_16;
2004                 } else if ('3'== cp[0] && '2'==cp[1]) {
2005                     cp += 2;
2006                     input_endian = ENDIAN_BIG;
2007                     enc_idx = UTF_32;
2008                 } else {
2009                     input_encoding = nkf_enc_from_index(UTF_8);
2010                     continue;
2011                 }
2012                 if (cp[0]=='L') {
2013                     cp++;
2014                     input_endian = ENDIAN_LITTLE;
2015                 } else if (cp[0] == 'B') {
2016                     cp++;
2017                     input_endian = ENDIAN_BIG;
2018                 }
2019                 enc_idx = enc_idx == UTF_16
2020                     ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
2021                     : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
2022                 input_encoding = nkf_enc_from_index(enc_idx);
2023             }
2024             continue;
2025 #endif
2026         /* Input code assumption */
2027         case 'J':   /* ISO-2022-JP input */
2028             input_encoding = nkf_enc_from_index(ISO_2022_JP);
2029             continue;
2030         case 'E':   /* EUC-JP input */
2031             input_encoding = nkf_enc_from_index(EUC_JP);
2032             continue;
2033         case 'S':   /* Windows-31J input */
2034             input_encoding = nkf_enc_from_index(WINDOWS_31J);
2035             continue;
2036         case 'Z':   /* Convert X0208 alphabet to asii */
2037             /* alpha_f
2038                bit:0   Convert JIS X 0208 Alphabet to ASCII
2039                bit:1   Convert Kankaku to one space
2040                bit:2   Convert Kankaku to two spaces
2041                bit:3   Convert HTML Entity
2042                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
2043             */
2044             while ('0'<= *cp && *cp <='9') {
2045                 alpha_f |= 1 << (*cp++ - '0');
2046             }
2047             if (!alpha_f) alpha_f = 1;
2048             continue;
2049         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
2050             x0201_f = FALSE;    /* No X0201->X0208 conversion */
2051             /* accept  X0201
2052                     ESC-(-I     in JIS, EUC, MS Kanji
2053                     SI/SO       in JIS, EUC, MS Kanji
2054                     SSO         in EUC, JIS, not in MS Kanji
2055                     MS Kanji (0xa0-0xdf)
2056                output  X0201
2057                     ESC-(-I     in JIS (0x20-0x5f)
2058                     SSO         in EUC (0xa0-0xdf)
2059                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
2060             */
2061             continue;
2062         case 'X':   /* Convert X0201 kana to X0208 */
2063             x0201_f = TRUE;
2064             continue;
2065         case 'F':   /* prserve new lines */
2066             fold_preserve_f = TRUE;
2067         case 'f':   /* folding -f60 or -f */
2068             fold_f = TRUE;
2069             fold_len = 0;
2070             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2071                 fold_len *= 10;
2072                 fold_len += *cp++ - '0';
2073             }
2074             if (!(0<fold_len && fold_len<BUFSIZ))
2075                 fold_len = DEFAULT_FOLD;
2076             if (*cp=='-') {
2077                 fold_margin = 0;
2078                 cp++;
2079                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2080                     fold_margin *= 10;
2081                     fold_margin += *cp++ - '0';
2082                 }
2083             }
2084             continue;
2085         case 'm':   /* MIME support */
2086             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
2087             if (*cp=='B'||*cp=='Q') {
2088                 mime_decode_mode = *cp++;
2089                 mimebuf_f = FIXED_MIME;
2090             } else if (*cp=='N') {
2091                 mime_f = TRUE; cp++;
2092             } else if (*cp=='S') {
2093                 mime_f = STRICT_MIME; cp++;
2094             } else if (*cp=='0') {
2095                 mime_decode_f = FALSE;
2096                 mime_f = FALSE; cp++;
2097             } else {
2098                 mime_f = STRICT_MIME;
2099             }
2100             continue;
2101         case 'M':   /* MIME output */
2102             if (*cp=='B') {
2103                 mimeout_mode = 'B';
2104                 mimeout_f = FIXED_MIME; cp++;
2105             } else if (*cp=='Q') {
2106                 mimeout_mode = 'Q';
2107                 mimeout_f = FIXED_MIME; cp++;
2108             } else {
2109                 mimeout_f = TRUE;
2110             }
2111             continue;
2112         case 'B':   /* Broken JIS support */
2113             /*  bit:0   no ESC JIS
2114                 bit:1   allow any x on ESC-(-x or ESC-$-x
2115                 bit:2   reset to ascii on NL
2116             */
2117             if ('9'>= *cp && *cp>='0')
2118                 broken_f |= 1<<(*cp++ -'0');
2119             else
2120                 broken_f |= TRUE;
2121             continue;
2122 #ifndef PERL_XS
2123         case 'O':/* for Output file */
2124             file_out_f = TRUE;
2125             continue;
2126 #endif
2127         case 'c':/* add cr code */
2128             nlmode_f = CRLF;
2129             continue;
2130         case 'd':/* delete cr code */
2131             nlmode_f = LF;
2132             continue;
2133         case 'I':   /* ISO-2022-JP output */
2134             iso2022jp_f = TRUE;
2135             continue;
2136         case 'L':  /* line mode */
2137             if (*cp=='u') {         /* unix */
2138                 nlmode_f = LF; cp++;
2139             } else if (*cp=='m') { /* mac */
2140                 nlmode_f = CR; cp++;
2141             } else if (*cp=='w') { /* windows */
2142                 nlmode_f = CRLF; cp++;
2143             } else if (*cp=='0') { /* no conversion  */
2144                 nlmode_f = 0; cp++;
2145             }
2146             continue;
2147 #ifndef PERL_XS
2148         case 'g':
2149             if ('2' <= *cp && *cp <= '9') {
2150                 guess_f = 2;
2151                 cp++;
2152             } else if (*cp == '0' || *cp == '1') {
2153                 guess_f = 1;
2154                 cp++;
2155             } else {
2156                 guess_f = 1;
2157             }
2158             continue;
2159 #endif
2160         case SP:
2161         /* module muliple options in a string are allowed for Perl moudle  */
2162             while(*cp && *cp++!='-');
2163             continue;
2164         default:
2165             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
2166             /* bogus option but ignored */
2167             continue;
2168         }
2169     }
2170 }
2171
2172 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2173 {
2174     if (iconv_func){
2175         struct input_code *p = input_code_list;
2176         while (p->name){
2177             if (iconv_func == p->iconv_func){
2178                 return p;
2179             }
2180             p++;
2181         }
2182     }
2183     return 0;
2184 }
2185
2186 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2187 {
2188 #ifdef INPUT_CODE_FIX
2189     if (f || !input_encoding)
2190 #endif
2191         if (estab_f != f){
2192             estab_f = f;
2193         }
2194
2195     if (iconv_func
2196 #ifdef INPUT_CODE_FIX
2197         && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
2198 #endif
2199         ){
2200         iconv = iconv_func;
2201     }
2202 #ifdef CHECK_OPTION
2203     if (estab_f && iconv_for_check != iconv){
2204         struct input_code *p = find_inputcode_byfunc(iconv);
2205         if (p){
2206             set_input_codename(p->name);
2207             debug(p->name);
2208         }
2209         iconv_for_check = iconv;
2210     }
2211 #endif
2212 }
2213
2214 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
2215 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
2216 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
2217 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
2218 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2219 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
2220 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
2221 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
2222
2223 #define SCORE_INIT (SCORE_iMIME)
2224
2225 static const char score_table_A0[] = {
2226     0, 0, 0, 0,
2227     0, 0, 0, 0,
2228     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2229     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2230 };
2231
2232 static const char score_table_F0[] = {
2233     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2234     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2235     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2236     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2237 };
2238
2239 void set_code_score(struct input_code *ptr, nkf_char score)
2240 {
2241     if (ptr){
2242         ptr->score |= score;
2243     }
2244 }
2245
2246 void clr_code_score(struct input_code *ptr, nkf_char score)
2247 {
2248     if (ptr){
2249         ptr->score &= ~score;
2250     }
2251 }
2252
2253 void code_score(struct input_code *ptr)
2254 {
2255     nkf_char c2 = ptr->buf[0];
2256 #ifdef UTF8_OUTPUT_ENABLE
2257     nkf_char c1 = ptr->buf[1];
2258 #endif
2259     if (c2 < 0){
2260         set_code_score(ptr, SCORE_ERROR);
2261     }else if (c2 == SSO){
2262         set_code_score(ptr, SCORE_KANA);
2263     }else if (c2 == 0x8f){
2264         set_code_score(ptr, SCORE_X0212);
2265 #ifdef UTF8_OUTPUT_ENABLE
2266     }else if (!e2w_conv(c2, c1)){
2267         set_code_score(ptr, SCORE_NO_EXIST);
2268 #endif
2269     }else if ((c2 & 0x70) == 0x20){
2270         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2271     }else if ((c2 & 0x70) == 0x70){
2272         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2273     }else if ((c2 & 0x70) >= 0x50){
2274         set_code_score(ptr, SCORE_L2);
2275     }
2276 }
2277
2278 void status_disable(struct input_code *ptr)
2279 {
2280     ptr->stat = -1;
2281     ptr->buf[0] = -1;
2282     code_score(ptr);
2283     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2284 }
2285
2286 void status_push_ch(struct input_code *ptr, nkf_char c)
2287 {
2288     ptr->buf[ptr->index++] = c;
2289 }
2290
2291 void status_clear(struct input_code *ptr)
2292 {
2293     ptr->stat = 0;
2294     ptr->index = 0;
2295 }
2296
2297 void status_reset(struct input_code *ptr)
2298 {
2299     status_clear(ptr);
2300     ptr->score = SCORE_INIT;
2301 }
2302
2303 void status_reinit(struct input_code *ptr)
2304 {
2305     status_reset(ptr);
2306     ptr->_file_stat = 0;
2307 }
2308
2309 void status_check(struct input_code *ptr, nkf_char c)
2310 {
2311     if (c <= DEL && estab_f){
2312         status_reset(ptr);
2313     }
2314 }
2315
2316 void s_status(struct input_code *ptr, nkf_char c)
2317 {
2318     switch(ptr->stat){
2319       case -1:
2320           status_check(ptr, c);
2321           break;
2322       case 0:
2323           if (c <= DEL){
2324               break;
2325 #ifdef NUMCHAR_OPTION
2326           }else if (is_unicode_capsule(c)){
2327               break;
2328 #endif
2329           }else if (0xa1 <= c && c <= 0xdf){
2330               status_push_ch(ptr, SSO);
2331               status_push_ch(ptr, c);
2332               code_score(ptr);
2333               status_clear(ptr);
2334           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2335               ptr->stat = 1;
2336               status_push_ch(ptr, c);
2337           }else if (0xed <= c && c <= 0xee){
2338               ptr->stat = 3;
2339               status_push_ch(ptr, c);
2340 #ifdef SHIFTJIS_CP932
2341           }else if (is_ibmext_in_sjis(c)){
2342               ptr->stat = 2;
2343               status_push_ch(ptr, c);
2344 #endif /* SHIFTJIS_CP932 */
2345 #ifdef X0212_ENABLE
2346           }else if (0xf0 <= c && c <= 0xfc){
2347               ptr->stat = 1;
2348               status_push_ch(ptr, c);
2349 #endif /* X0212_ENABLE */
2350           }else{
2351               status_disable(ptr);
2352           }
2353           break;
2354       case 1:
2355           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2356               status_push_ch(ptr, c);
2357               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2358               code_score(ptr);
2359               status_clear(ptr);
2360           }else{
2361               status_disable(ptr);
2362           }
2363           break;
2364       case 2:
2365 #ifdef SHIFTJIS_CP932
2366         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2367             status_push_ch(ptr, c);
2368             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2369                 set_code_score(ptr, SCORE_CP932);
2370                 status_clear(ptr);
2371                 break;
2372             }
2373         }
2374 #endif /* SHIFTJIS_CP932 */
2375         status_disable(ptr);
2376           break;
2377       case 3:
2378           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2379               status_push_ch(ptr, c);
2380               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2381             set_code_score(ptr, SCORE_CP932);
2382             status_clear(ptr);
2383           }else{
2384               status_disable(ptr);
2385           }
2386           break;
2387     }
2388 }
2389
2390 void e_status(struct input_code *ptr, nkf_char c)
2391 {
2392     switch (ptr->stat){
2393       case -1:
2394           status_check(ptr, c);
2395           break;
2396       case 0:
2397           if (c <= DEL){
2398               break;
2399 #ifdef NUMCHAR_OPTION
2400           }else if (is_unicode_capsule(c)){
2401               break;
2402 #endif
2403           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2404               ptr->stat = 1;
2405               status_push_ch(ptr, c);
2406 #ifdef X0212_ENABLE
2407           }else if (0x8f == c){
2408               ptr->stat = 2;
2409               status_push_ch(ptr, c);
2410 #endif /* X0212_ENABLE */
2411           }else{
2412               status_disable(ptr);
2413           }
2414           break;
2415       case 1:
2416           if (0xa1 <= c && c <= 0xfe){
2417               status_push_ch(ptr, c);
2418               code_score(ptr);
2419               status_clear(ptr);
2420           }else{
2421               status_disable(ptr);
2422           }
2423           break;
2424 #ifdef X0212_ENABLE
2425       case 2:
2426           if (0xa1 <= c && c <= 0xfe){
2427               ptr->stat = 1;
2428               status_push_ch(ptr, c);
2429           }else{
2430               status_disable(ptr);
2431           }
2432 #endif /* X0212_ENABLE */
2433     }
2434 }
2435
2436 #ifdef UTF8_INPUT_ENABLE
2437 void w_status(struct input_code *ptr, nkf_char c)
2438 {
2439     switch (ptr->stat){
2440       case -1:
2441           status_check(ptr, c);
2442           break;
2443       case 0:
2444           if (c <= DEL){
2445               break;
2446 #ifdef NUMCHAR_OPTION
2447           }else if (is_unicode_capsule(c)){
2448               break;
2449 #endif
2450           }else if (0xc0 <= c && c <= 0xdf){
2451               ptr->stat = 1;
2452               status_push_ch(ptr, c);
2453           }else if (0xe0 <= c && c <= 0xef){
2454               ptr->stat = 2;
2455               status_push_ch(ptr, c);
2456           }else if (0xf0 <= c && c <= 0xf4){
2457               ptr->stat = 3;
2458               status_push_ch(ptr, c);
2459           }else{
2460               status_disable(ptr);
2461           }
2462           break;
2463       case 1:
2464       case 2:
2465           if (0x80 <= c && c <= 0xbf){
2466               status_push_ch(ptr, c);
2467               if (ptr->index > ptr->stat){
2468                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2469                              && ptr->buf[2] == 0xbf);
2470                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2471                            &ptr->buf[0], &ptr->buf[1]);
2472                   if (!bom){
2473                       code_score(ptr);
2474                   }
2475                   status_clear(ptr);
2476               }
2477           }else{
2478               status_disable(ptr);
2479           }
2480           break;
2481       case 3:
2482         if (0x80 <= c && c <= 0xbf){
2483             if (ptr->index < ptr->stat){
2484                 status_push_ch(ptr, c);
2485             } else {
2486                 status_clear(ptr);
2487             }
2488           }else{
2489               status_disable(ptr);
2490           }
2491           break;
2492     }
2493 }
2494 #endif
2495
2496 void code_status(nkf_char c)
2497 {
2498     int action_flag = 1;
2499     struct input_code *result = 0;
2500     struct input_code *p = input_code_list;
2501     while (p->name){
2502         if (!p->status_func) {
2503             ++p;
2504             continue;
2505         }
2506         if (!p->status_func)
2507             continue;
2508         (p->status_func)(p, c);
2509         if (p->stat > 0){
2510             action_flag = 0;
2511         }else if(p->stat == 0){
2512             if (result){
2513                 action_flag = 0;
2514             }else{
2515                 result = p;
2516             }
2517         }
2518         ++p;
2519     }
2520
2521     if (action_flag){
2522         if (result && !estab_f){
2523             set_iconv(TRUE, result->iconv_func);
2524         }else if (c <= DEL){
2525             struct input_code *ptr = input_code_list;
2526             while (ptr->name){
2527                 status_reset(ptr);
2528                 ++ptr;
2529             }
2530         }
2531     }
2532 }
2533
2534 #ifndef WIN32DLL
2535 nkf_char std_getc(FILE *f)
2536 {
2537     if (std_gc_ndx){
2538         return std_gc_buf[--std_gc_ndx];
2539     }
2540     return getc(f);
2541 }
2542 #endif /*WIN32DLL*/
2543
2544 nkf_char std_ungetc(nkf_char c, FILE *f)
2545 {
2546     if (std_gc_ndx == STD_GC_BUFSIZE){
2547         return EOF;
2548     }
2549     std_gc_buf[std_gc_ndx++] = c;
2550     return c;
2551 }
2552
2553 #ifndef WIN32DLL
2554 void std_putc(nkf_char c)
2555 {
2556     if(c!=EOF)
2557       putchar(c);
2558 }
2559 #endif /*WIN32DLL*/
2560
2561 #if !defined(PERL_XS) && !defined(WIN32DLL)
2562 nkf_char noconvert(FILE *f)
2563 {
2564     nkf_char    c;
2565
2566     if (nop_f == 2)
2567         module_connection();
2568     while ((c = (*i_getc)(f)) != EOF)
2569       (*o_putc)(c);
2570     (*o_putc)(EOF);
2571     return 1;
2572 }
2573 #endif
2574
2575 void module_connection(void)
2576 {
2577     if (input_encoding) set_input_encoding(input_encoding);
2578     if (!output_encoding) {
2579         output_encoding = nkf_default_encoding();
2580     }
2581     set_output_encoding(output_encoding);
2582     oconv = nkf_enc_to_oconv(output_encoding);
2583     o_putc = std_putc;
2584
2585     /* replace continucation module, from output side */
2586
2587     /* output redicrection */
2588 #ifdef CHECK_OPTION
2589     if (noout_f || guess_f){
2590         o_putc = no_putc;
2591     }
2592 #endif
2593     if (mimeout_f) {
2594         o_mputc = o_putc;
2595         o_putc = mime_putc;
2596         if (mimeout_f == TRUE) {
2597             o_base64conv = oconv; oconv = base64_conv;
2598         }
2599         /* base64_count = 0; */
2600     }
2601
2602     if (nlmode_f || guess_f) {
2603         o_nlconv = oconv; oconv = nl_conv;
2604     }
2605     if (rot_f) {
2606         o_rot_conv = oconv; oconv = rot_conv;
2607     }
2608     if (iso2022jp_f) {
2609         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2610     }
2611     if (hira_f) {
2612         o_hira_conv = oconv; oconv = hira_conv;
2613     }
2614     if (fold_f) {
2615         o_fconv = oconv; oconv = fold_conv;
2616         f_line = 0;
2617     }
2618     if (alpha_f || x0201_f) {
2619         o_zconv = oconv; oconv = z_conv;
2620     }
2621
2622     i_getc = std_getc;
2623     i_ungetc = std_ungetc;
2624     /* input redicrection */
2625 #ifdef INPUT_OPTION
2626     if (cap_f){
2627         i_cgetc = i_getc; i_getc = cap_getc;
2628         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2629     }
2630     if (url_f){
2631         i_ugetc = i_getc; i_getc = url_getc;
2632         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2633     }
2634 #endif
2635 #ifdef NUMCHAR_OPTION
2636     if (numchar_f){
2637         i_ngetc = i_getc; i_getc = numchar_getc;
2638         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2639     }
2640 #endif
2641 #ifdef UNICODE_NORMALIZATION
2642     if (nfc_f){
2643         i_nfc_getc = i_getc; i_getc = nfc_getc;
2644         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2645     }
2646 #endif
2647     if (mime_f && mimebuf_f==FIXED_MIME) {
2648         i_mgetc = i_getc; i_getc = mime_getc;
2649         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2650     }
2651     if (broken_f & 1) {
2652         i_bgetc = i_getc; i_getc = broken_getc;
2653         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2654     }
2655     if (input_encoding) {
2656         set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
2657     } else {
2658         set_iconv(FALSE, e_iconv);
2659     }
2660
2661     {
2662         struct input_code *p = input_code_list;
2663         while (p->name){
2664             status_reinit(p++);
2665         }
2666     }
2667 }
2668
2669 /*
2670  * Check and Ignore BOM
2671  */
2672 void check_bom(FILE *f)
2673 {
2674     int c2;
2675     switch(c2 = (*i_getc)(f)){
2676     case 0x00:
2677         if((c2 = (*i_getc)(f)) == 0x00){
2678             if((c2 = (*i_getc)(f)) == 0xFE){
2679                 if((c2 = (*i_getc)(f)) == 0xFF){
2680                     if(!input_encoding){
2681                         set_iconv(TRUE, w_iconv32);
2682                     }
2683                     if (iconv == w_iconv32) {
2684                         input_endian = ENDIAN_BIG;
2685                         return;
2686                     }
2687                     (*i_ungetc)(0xFF,f);
2688                 }else (*i_ungetc)(c2,f);
2689                 (*i_ungetc)(0xFE,f);
2690             }else if(c2 == 0xFF){
2691                 if((c2 = (*i_getc)(f)) == 0xFE){
2692                     if(!input_encoding){
2693                         set_iconv(TRUE, w_iconv32);
2694                     }
2695                     if (iconv == w_iconv32) {
2696                         input_endian = ENDIAN_2143;
2697                         return;
2698                     }
2699                     (*i_ungetc)(0xFF,f);
2700                 }else (*i_ungetc)(c2,f);
2701                 (*i_ungetc)(0xFF,f);
2702             }else (*i_ungetc)(c2,f);
2703             (*i_ungetc)(0x00,f);
2704         }else (*i_ungetc)(c2,f);
2705         (*i_ungetc)(0x00,f);
2706         break;
2707     case 0xEF:
2708         if((c2 = (*i_getc)(f)) == 0xBB){
2709             if((c2 = (*i_getc)(f)) == 0xBF){
2710                 if(!input_encoding){
2711                     set_iconv(TRUE, w_iconv);
2712                 }
2713                 if (iconv == w_iconv) {
2714                     return;
2715                 }
2716                 (*i_ungetc)(0xBF,f);
2717             }else (*i_ungetc)(c2,f);
2718             (*i_ungetc)(0xBB,f);
2719         }else (*i_ungetc)(c2,f);
2720         (*i_ungetc)(0xEF,f);
2721         break;
2722     case 0xFE:
2723         if((c2 = (*i_getc)(f)) == 0xFF){
2724             if((c2 = (*i_getc)(f)) == 0x00){
2725                 if((c2 = (*i_getc)(f)) == 0x00){
2726                     if(!input_encoding){
2727                         set_iconv(TRUE, w_iconv32);
2728                     }
2729                     if (iconv == w_iconv32) {
2730                         input_endian = ENDIAN_3412;
2731                         return;
2732                     }
2733                     (*i_ungetc)(0x00,f);
2734                 }else (*i_ungetc)(c2,f);
2735                 (*i_ungetc)(0x00,f);
2736             }else (*i_ungetc)(c2,f);
2737             if(!input_encoding){
2738                 set_iconv(TRUE, w_iconv16);
2739             }
2740             if (iconv == w_iconv16) {
2741                 input_endian = ENDIAN_BIG;
2742                 return;
2743             }
2744             (*i_ungetc)(0xFF,f);
2745         }else (*i_ungetc)(c2,f);
2746         (*i_ungetc)(0xFE,f);
2747         break;
2748     case 0xFF:
2749         if((c2 = (*i_getc)(f)) == 0xFE){
2750             if((c2 = (*i_getc)(f)) == 0x00){
2751                 if((c2 = (*i_getc)(f)) == 0x00){
2752                     if(!input_encoding){
2753                         set_iconv(TRUE, w_iconv32);
2754                     }
2755                     if (iconv == w_iconv32) {
2756                         input_endian = ENDIAN_LITTLE;
2757                         return;
2758                     }
2759                     (*i_ungetc)(0x00,f);
2760                 }else (*i_ungetc)(c2,f);
2761                 (*i_ungetc)(0x00,f);
2762             }else (*i_ungetc)(c2,f);
2763             if(!input_encoding){
2764                 set_iconv(TRUE, w_iconv16);
2765             }
2766             if (iconv == w_iconv16) {
2767                 input_endian = ENDIAN_LITTLE;
2768                 return;
2769             }
2770             (*i_ungetc)(0xFE,f);
2771         }else (*i_ungetc)(c2,f);
2772         (*i_ungetc)(0xFF,f);
2773         break;
2774     default:
2775         (*i_ungetc)(c2,f);
2776         break;
2777     }
2778 }
2779
2780 /*
2781    Conversion main loop. Code detection only.
2782  */
2783
2784 nkf_char kanji_convert(FILE *f)
2785 {
2786     nkf_char    c3, c2=0, c1, c0=0;
2787     int is_8bit = FALSE;
2788
2789     if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
2790         is_8bit = TRUE;
2791     }
2792
2793     input_mode = ASCII;
2794     output_mode = ASCII;
2795     shift_mode = FALSE;
2796
2797 #define NEXT continue      /* no output, get next */
2798 #define SEND ;             /* output c1 and c2, get next */
2799 #define LAST break         /* end of loop, go closing  */
2800
2801     module_connection();
2802     check_bom(f);
2803
2804     while ((c1 = (*i_getc)(f)) != EOF) {
2805 #ifdef INPUT_CODE_FIX
2806         if (!input_encoding)
2807 #endif
2808             code_status(c1);
2809         if (c2) {
2810             /* second byte */
2811             if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
2812                 /* in case of 8th bit is on */
2813                 if (!estab_f&&!mime_decode_mode) {
2814                     /* in case of not established yet */
2815                     /* It is still ambiguious */
2816                     if (h_conv(f, c2, c1)==EOF)
2817                         LAST;
2818                     else
2819                         c2 = 0;
2820                     NEXT;
2821                 } else {
2822                     /* in case of already established */
2823                     if (c1 < AT) {
2824                         /* ignore bogus code and not CP5022x UCD */
2825                         c2 = 0;
2826                         NEXT;
2827                     } else {
2828                         SEND;
2829                     }
2830                 }
2831             } else
2832                 /* second byte, 7 bit code */
2833                 /* it might be kanji shitfted */
2834                 if ((c1 == DEL) || (c1 <= SP)) {
2835                     /* ignore bogus first code */
2836                     c2 = 0;
2837                     NEXT;
2838                 } else
2839                     SEND;
2840         } else {
2841             /* first byte */
2842 #ifdef UTF8_INPUT_ENABLE
2843             if (iconv == w_iconv16) {
2844                 if (input_endian == ENDIAN_BIG) {
2845                     c2 = c1;
2846                     if ((c1 = (*i_getc)(f)) != EOF) {
2847                         if (0xD8 <= c2 && c2 <= 0xDB) {
2848                             if ((c0 = (*i_getc)(f)) != EOF) {
2849                                 c0 <<= 8;
2850                                 if ((c3 = (*i_getc)(f)) != EOF) {
2851                                     c0 |= c3;
2852                                 } else c2 = EOF;
2853                             } else c2 = EOF;
2854                         }
2855                     } else c2 = EOF;
2856                 } else {
2857                     if ((c2 = (*i_getc)(f)) != EOF) {
2858                         if (0xD8 <= c2 && c2 <= 0xDB) {
2859                             if ((c3 = (*i_getc)(f)) != EOF) {
2860                                 if ((c0 = (*i_getc)(f)) != EOF) {
2861                                     c0 <<= 8;
2862                                     c0 |= c3;
2863                                 } else c2 = EOF;
2864                             } else c2 = EOF;
2865                         }
2866                     } else c2 = EOF;
2867                 }
2868                 SEND;
2869             } else if(iconv == w_iconv32){
2870                 int c3 = c1;
2871                 if((c2 = (*i_getc)(f)) != EOF &&
2872                    (c1 = (*i_getc)(f)) != EOF &&
2873                    (c0 = (*i_getc)(f)) != EOF){
2874                     switch(input_endian){
2875                     case ENDIAN_BIG:
2876                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2877                         break;
2878                     case ENDIAN_LITTLE:
2879                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2880                         break;
2881                     case ENDIAN_2143:
2882                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2883                         break;
2884                     case ENDIAN_3412:
2885                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2886                         break;
2887                     }
2888                     c2 = 0;
2889                 }else{
2890                     c2 = EOF;
2891                 }
2892                 SEND;
2893             } else
2894 #endif
2895 #ifdef NUMCHAR_OPTION
2896             if (is_unicode_capsule(c1)){
2897                 SEND;
2898             } else
2899 #endif
2900             if (c1 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
2901                 /* 8 bit code */
2902                 if (!estab_f && !iso8859_f) {
2903                     /* not established yet */
2904                     c2 = c1;
2905                     NEXT;
2906                 } else { /* estab_f==TRUE */
2907                     if (iso8859_f) {
2908                         c2 = ISO_8859_1;
2909                         c1 &= 0x7f;
2910                         SEND;
2911                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2912                         /* SJIS X0201 Case... */
2913                         if (iso2022jp_f && !x0201_f) {
2914                             (*oconv)(GETA1, GETA2);
2915                             NEXT;
2916                         } else {
2917                             c2 = JIS_X_0201;
2918                             c1 &= 0x7f;
2919                             SEND;
2920                         }
2921                     } else if (c1==SSO && iconv != s_iconv) {
2922                         /* EUC X0201 Case */
2923                         c1 = (*i_getc)(f);  /* skip SSO */
2924                         code_status(c1);
2925                         if (SSP<=c1 && c1<0xe0) {
2926                             if (iso2022jp_f && !x0201_f) {
2927                                 (*oconv)(GETA1, GETA2);
2928                                 NEXT;
2929                             } else {
2930                                 c2 = JIS_X_0201;
2931                                 c1 &= 0x7f;
2932                                 SEND;
2933                             }
2934                         } else  { /* bogus code, skip SSO and one byte */
2935                             NEXT;
2936                         }
2937                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2938                                (c1 == 0xFD || c1 == 0xFE)) {
2939                         /* CP10001 */
2940                         c2 = JIS_X_0201;
2941                         c1 &= 0x7f;
2942                         SEND;
2943                     } else {
2944                        /* already established */
2945                        c2 = c1;
2946                        NEXT;
2947                     }
2948                 }
2949             } else if ((c1 > SP) && (c1 != DEL)) {
2950                 /* in case of Roman characters */
2951                 if (shift_mode) {
2952                     /* output 1 shifted byte */
2953                     if (iso8859_f) {
2954                         c2 = ISO_8859_1;
2955                         SEND;
2956                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2957                       /* output 1 shifted byte */
2958                         if (iso2022jp_f && !x0201_f) {
2959                             (*oconv)(GETA1, GETA2);
2960                             NEXT;
2961                         } else {
2962                             c2 = JIS_X_0201;
2963                             SEND;
2964                         }
2965                     } else {
2966                         /* look like bogus code */
2967                         NEXT;
2968                     }
2969                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
2970                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
2971                     /* in case of Kanji shifted */
2972                     c2 = c1;
2973                     NEXT;
2974                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2975                     /* Check MIME code */
2976                     if ((c1 = (*i_getc)(f)) == EOF) {
2977                         (*oconv)(0, '=');
2978                         LAST;
2979                     } else if (c1 == '?') {
2980                         /* =? is mime conversion start sequence */
2981                         if(mime_f == STRICT_MIME) {
2982                             /* check in real detail */
2983                             if (mime_begin_strict(f) == EOF)
2984                                 LAST;
2985                             else
2986                                 NEXT;
2987                         } else if (mime_begin(f) == EOF)
2988                             LAST;
2989                         else
2990                             NEXT;
2991                     } else {
2992                         (*oconv)(0, '=');
2993                         (*i_ungetc)(c1,f);
2994                         NEXT;
2995                     }
2996                 } else {
2997                     /* normal ASCII code */
2998                     SEND;
2999                 }
3000             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
3001                 shift_mode = FALSE;
3002                 NEXT;
3003             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
3004                 shift_mode = TRUE;
3005                 NEXT;
3006             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
3007                 if ((c1 = (*i_getc)(f)) == EOF) {
3008                     /*  (*oconv)(0, ESC); don't send bogus code */
3009                     LAST;
3010                 } else if (c1 == '$') {
3011                     if ((c1 = (*i_getc)(f)) == EOF) {
3012                         /*
3013                         (*oconv)(0, ESC); don't send bogus code
3014                         (*oconv)(0, '$'); */
3015                         LAST;
3016                     } else if (c1 == '@'|| c1 == 'B') {
3017                         /* This is kanji introduction */
3018                         input_mode = JIS_X_0208;
3019                         shift_mode = FALSE;
3020                         set_input_codename("ISO-2022-JP");
3021 #ifdef CHECK_OPTION
3022                         debug("ISO-2022-JP");
3023 #endif
3024                         NEXT;
3025                     } else if (c1 == '(') {
3026                         if ((c1 = (*i_getc)(f)) == EOF) {
3027                             /* don't send bogus code
3028                             (*oconv)(0, ESC);
3029                             (*oconv)(0, '$');
3030                             (*oconv)(0, '(');
3031                                 */
3032                             LAST;
3033                         } else if (c1 == '@'|| c1 == 'B') {
3034                             /* This is kanji introduction */
3035                             input_mode = JIS_X_0208;
3036                             shift_mode = FALSE;
3037                             NEXT;
3038 #ifdef X0212_ENABLE
3039                         } else if (c1 == 'D'){
3040                             input_mode = JIS_X_0212;
3041                             shift_mode = FALSE;
3042                             NEXT;
3043 #endif /* X0212_ENABLE */
3044                         } else if (c1 == 0x4F){
3045                             input_mode = JIS_X_0213_1;
3046                             shift_mode = FALSE;
3047                             NEXT;
3048                         } else if (c1 == 0x50){
3049                             input_mode = JIS_X_0213_2;
3050                             shift_mode = FALSE;
3051                             NEXT;
3052                         } else {
3053                             /* could be some special code */
3054                             (*oconv)(0, ESC);
3055                             (*oconv)(0, '$');
3056                             (*oconv)(0, '(');
3057                             (*oconv)(0, c1);
3058                             NEXT;
3059                         }
3060                     } else if (broken_f&0x2) {
3061                         /* accept any ESC-(-x as broken code ... */
3062                         input_mode = JIS_X_0208;
3063                         shift_mode = FALSE;
3064                         NEXT;
3065                     } else {
3066                         (*oconv)(0, ESC);
3067                         (*oconv)(0, '$');
3068                         (*oconv)(0, c1);
3069                         NEXT;
3070                     }
3071                 } else if (c1 == '(') {
3072                     if ((c1 = (*i_getc)(f)) == EOF) {
3073                         /* don't send bogus code
3074                         (*oconv)(0, ESC);
3075                         (*oconv)(0, '('); */
3076                         LAST;
3077                     } else {
3078                         if (c1 == 'I') {
3079                             /* This is X0201 kana introduction */
3080                             input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
3081                             NEXT;
3082                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
3083                             /* This is X0208 kanji introduction */
3084                             input_mode = ASCII; shift_mode = FALSE;
3085                             NEXT;
3086                         } else if (broken_f&0x2) {
3087                             input_mode = ASCII; shift_mode = FALSE;
3088                             NEXT;
3089                         } else {
3090                             (*oconv)(0, ESC);
3091                             (*oconv)(0, '(');
3092                             /* maintain various input_mode here */
3093                             SEND;
3094                         }
3095                     }
3096                } else if ( c1 == 'N' || c1 == 'n'){
3097                    /* SS2 */
3098                    c3 = (*i_getc)(f);  /* skip SS2 */
3099                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
3100                        c1 = c3;
3101                        c2 = JIS_X_0201;
3102                        SEND;
3103                    }else{
3104                        (*i_ungetc)(c3, f);
3105                        /* lonely ESC  */
3106                        (*oconv)(0, ESC);
3107                        SEND;
3108                    }
3109                 } else {
3110                     /* lonely ESC  */
3111                     (*oconv)(0, ESC);
3112                     SEND;
3113                 }
3114             } else if (c1 == ESC && iconv == s_iconv) {
3115                 /* ESC in Shift_JIS */
3116                 if ((c1 = (*i_getc)(f)) == EOF) {
3117                     /*  (*oconv)(0, ESC); don't send bogus code */
3118                     LAST;
3119                 } else if (c1 == '$') {
3120                     /* J-PHONE emoji */
3121                     if ((c1 = (*i_getc)(f)) == EOF) {
3122                         /*
3123                            (*oconv)(0, ESC); don't send bogus code
3124                            (*oconv)(0, '$'); */
3125                         LAST;
3126                     } else {
3127                         if (('E' <= c1 && c1 <= 'G') ||
3128                             ('O' <= c1 && c1 <= 'Q')) {
3129                             /*
3130                                NUM : 0 1 2 3 4 5
3131                                BYTE: G E F O P Q
3132                                C%7 : 1 6 0 2 3 4
3133                                C%7 : 0 1 2 3 4 5 6
3134                                NUM : 2 0 3 4 5 X 1
3135                              */
3136                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
3137                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
3138                             while ((c1 = (*i_getc)(f)) != EOF) {
3139                                 if (SP <= c1 && c1 <= 'z') {
3140                                     (*oconv)(0, c1 + c0);
3141                                 } else break; /* c1 == SO */
3142                             }
3143                         }
3144                     }
3145                     if (c1 == EOF) LAST;
3146                     NEXT;
3147                 } else {
3148                     /* lonely ESC  */
3149                     (*oconv)(0, ESC);
3150                     SEND;
3151                 }
3152             } else if (c1 == LF || c1 == CR) {
3153                 if (broken_f&4) {
3154                     input_mode = ASCII; set_iconv(FALSE, 0);
3155                     SEND;
3156                 } else if (mime_decode_f && !mime_decode_mode){
3157                     if (c1 == LF) {
3158                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
3159                             i_ungetc(SP,f);
3160                             continue;
3161                         } else {
3162                             i_ungetc(c1,f);
3163                         }
3164                         c1 = LF;
3165                         SEND;
3166                     } else  { /* if (c1 == CR)*/
3167                         if ((c1=(*i_getc)(f))!=EOF) {
3168                             if (c1==SP) {
3169                                 i_ungetc(SP,f);
3170                                 continue;
3171                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
3172                                 i_ungetc(SP,f);
3173                                 continue;
3174                             } else {
3175                                 i_ungetc(c1,f);
3176                             }
3177                             i_ungetc(LF,f);
3178                         } else {
3179                             i_ungetc(c1,f);
3180                         }
3181                         c1 = CR;
3182                         SEND;
3183                     }
3184                 }
3185             } else if (c1 == DEL && input_mode == JIS_X_0208) {
3186                 /* CP5022x */
3187                 c2 = c1;
3188                 NEXT;
3189             } else
3190                 SEND;
3191         }
3192         /* send: */
3193         switch(input_mode){
3194         case ASCII:
3195             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
3196             case -2:
3197                 /* 4 bytes UTF-8 */
3198                 if ((c0 = (*i_getc)(f)) != EOF) {
3199                     code_status(c0);
3200                     c0 <<= 8;
3201                     if ((c3 = (*i_getc)(f)) != EOF) {
3202                         code_status(c3);
3203                         (*iconv)(c2, c1, c0|c3);
3204                     }
3205                 }
3206                 break;
3207             case -1:
3208                 /* 3 bytes EUC or UTF-8 */
3209                 if ((c0 = (*i_getc)(f)) != EOF) {
3210                     code_status(c0);
3211                     (*iconv)(c2, c1, c0);
3212                 }
3213                 break;
3214             }
3215             break;
3216         case JIS_X_0208:
3217         case JIS_X_0213_1:
3218             if (ms_ucs_map_f &&
3219                 0x7F <= c2 && c2 <= 0x92 &&
3220                 0x21 <= c1 && c1 <= 0x7E) {
3221                 /* CP932 UDC */
3222                 if(c1 == 0x7F) return 0;
3223                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
3224                 c2 = 0;
3225             }
3226             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
3227             break;
3228 #ifdef X0212_ENABLE
3229         case JIS_X_0212:
3230             (*oconv)(PREFIX_EUCG3 | c2, c1);
3231             break;
3232 #endif /* X0212_ENABLE */
3233         case JIS_X_0213_2:
3234             (*oconv)(PREFIX_EUCG3 | c2, c1);
3235             break;
3236         default:
3237             (*oconv)(input_mode, c1);  /* other special case */
3238         }
3239
3240         c2 = 0;
3241         c0 = 0;
3242         continue;
3243         /* goto next_word */
3244     }
3245
3246     /* epilogue */
3247     (*iconv)(EOF, 0, 0);
3248     if (!input_codename)
3249     {
3250         if (is_8bit) {
3251             struct input_code *p = input_code_list;
3252             struct input_code *result = p;
3253             while (p->name){
3254                 if (p->score < result->score) result = p;
3255                 ++p;
3256             }
3257             set_input_codename(result->name);
3258 #ifdef CHECK_OPTION
3259             debug(result->name);
3260 #endif
3261         }
3262     }
3263     return 1;
3264 }
3265
3266 nkf_char
3267 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3268 {
3269     nkf_char ret, c3, c0;
3270     int hold_index;
3271
3272
3273     /** it must NOT be in the kanji shifte sequence      */
3274     /** it must NOT be written in JIS7                   */
3275     /** and it must be after 2 byte 8bit code            */
3276
3277     hold_count = 0;
3278     push_hold_buf(c2);
3279     push_hold_buf(c1);
3280
3281     while ((c1 = (*i_getc)(f)) != EOF) {
3282         if (c1 == ESC){
3283             (*i_ungetc)(c1,f);
3284             break;
3285         }
3286         code_status(c1);
3287         if (push_hold_buf(c1) == EOF || estab_f){
3288             break;
3289         }
3290     }
3291
3292     if (!estab_f){
3293         struct input_code *p = input_code_list;
3294         struct input_code *result = p;
3295         if (c1 == EOF){
3296             code_status(c1);
3297         }
3298         while (p->name){
3299             if (p->status_func && p->score < result->score){
3300                 result = p;
3301             }
3302             ++p;
3303         }
3304         set_iconv(TRUE, result->iconv_func);
3305     }
3306
3307
3308     /** now,
3309      ** 1) EOF is detected, or
3310      ** 2) Code is established, or
3311      ** 3) Buffer is FULL (but last word is pushed)
3312      **
3313      ** in 1) and 3) cases, we continue to use
3314      ** Kanji codes by oconv and leave estab_f unchanged.
3315      **/
3316
3317     ret = c1;
3318     hold_index = 0;
3319     while (hold_index < hold_count){
3320         c2 = hold_buf[hold_index++];
3321         if (c2 <= DEL
3322 #ifdef NUMCHAR_OPTION
3323             || is_unicode_capsule(c2)
3324 #endif
3325             ){
3326             (*iconv)(0, c2, 0);
3327             continue;
3328         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3329             (*iconv)(JIS_X_0201, c2, 0);
3330             continue;
3331         }
3332         if (hold_index < hold_count){
3333             c1 = hold_buf[hold_index++];
3334         }else{
3335             c1 = (*i_getc)(f);
3336             if (c1 == EOF){
3337                 c3 = EOF;
3338                 break;
3339             }
3340             code_status(c1);
3341         }
3342         c0 = 0;
3343         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3344         case -2:
3345             /* 4 bytes UTF-8 */
3346             if (hold_index < hold_count){
3347                 c0 = hold_buf[hold_index++];
3348             } else if ((c0 = (*i_getc)(f)) == EOF) {
3349                 ret = EOF;
3350                 break;
3351             } else {
3352                 code_status(c0);
3353                 c0 <<= 8;
3354                 if (hold_index < hold_count){
3355                     c3 = hold_buf[hold_index++];
3356                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3357                     c0 = ret = EOF;
3358                     break;
3359                 } else {
3360                     code_status(c3);
3361                     (*iconv)(c2, c1, c0|c3);
3362                 }
3363             }
3364             break;
3365         case -1:
3366             /* 3 bytes EUC or UTF-8 */
3367             if (hold_index < hold_count){
3368                 c0 = hold_buf[hold_index++];
3369             } else if ((c0 = (*i_getc)(f)) == EOF) {
3370                 ret = EOF;
3371                 break;
3372             } else {
3373                 code_status(c0);
3374             }
3375             (*iconv)(c2, c1, c0);
3376             break;
3377         }
3378         if (c0 == EOF) break;
3379     }
3380     return ret;
3381 }
3382
3383 nkf_char push_hold_buf(nkf_char c2)
3384 {
3385     if (hold_count >= HOLD_SIZE*2)
3386         return (EOF);
3387     hold_buf[hold_count++] = (unsigned char)c2;
3388     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3389 }
3390
3391 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3392 {
3393 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3394     nkf_char val;
3395 #endif
3396     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3397 #ifdef SHIFTJIS_CP932
3398     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3399         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3400         if (val){
3401             c2 = val >> 8;
3402             c1 = val & 0xff;
3403         }
3404     }
3405     if (cp932inv_f
3406         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3407         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3408         if (c){
3409             c2 = c >> 8;
3410             c1 = c & 0xff;
3411         }
3412     }
3413 #endif /* SHIFTJIS_CP932 */
3414 #ifdef X0212_ENABLE
3415     if (!x0213_f && is_ibmext_in_sjis(c2)){
3416         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3417         if (val){
3418             if (val > 0x7FFF){
3419                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3420                 c1 = val & 0xff;
3421             }else{
3422                 c2 = val >> 8;
3423                 c1 = val & 0xff;
3424             }
3425             if (p2) *p2 = c2;
3426             if (p1) *p1 = c1;
3427             return 0;
3428         }
3429     }
3430 #endif
3431     if(c2 >= 0x80){
3432         if(x0213_f && c2 >= 0xF0){
3433             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3434                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3435             }else{ /* 78<=k<=94 */
3436                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3437                 if (0x9E < c1) c2++;
3438             }
3439         }else{
3440             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3441             if (0x9E < c1) c2++;
3442         }
3443         if (c1 < 0x9F)
3444             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3445         else {
3446             c1 = c1 - 0x7E;
3447         }
3448     }
3449
3450 #ifdef X0212_ENABLE
3451     c2 = x0212_unshift(c2);
3452 #endif
3453     if (p2) *p2 = c2;
3454     if (p1) *p1 = c1;
3455     return 0;
3456 }
3457
3458 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3459 {
3460     if (c2 == JIS_X_0201) {
3461         c1 &= 0x7f;
3462     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3463         /* NOP */
3464     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3465         /* CP932 UDC */
3466         if(c1 == 0x7F) return 0;
3467         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3468         c2 = 0;
3469     } else {
3470         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3471         if (ret) return ret;
3472     }
3473     (*oconv)(c2, c1);
3474     return 0;
3475 }
3476
3477 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3478 {
3479     if (c2 == JIS_X_0201) {
3480         c1 &= 0x7f;
3481 #ifdef X0212_ENABLE
3482     }else if (c2 == 0x8f){
3483         if (c0 == 0){
3484             return -1;
3485         }
3486         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3487             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3488             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3489             c2 = 0;
3490         } else {
3491             c2 = (c2 << 8) | (c1 & 0x7f);
3492             c1 = c0 & 0x7f;
3493 #ifdef SHIFTJIS_CP932
3494             if (cp51932_f){
3495                 nkf_char s2, s1;
3496                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3497                     s2e_conv(s2, s1, &c2, &c1);
3498                     if (c2 < 0x100){
3499                         c1 &= 0x7f;
3500                         c2 &= 0x7f;
3501                     }
3502                 }
3503             }
3504 #endif /* SHIFTJIS_CP932 */
3505         }
3506 #endif /* X0212_ENABLE */
3507     } else if (c2 == SSO){
3508         c2 = JIS_X_0201;
3509         c1 &= 0x7f;
3510     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3511         /* NOP */
3512     } else {
3513         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3514             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3515             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3516             c2 = 0;
3517         } else {
3518             c1 &= 0x7f;
3519             c2 &= 0x7f;
3520 #ifdef SHIFTJIS_CP932
3521             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3522                 nkf_char s2, s1;
3523                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3524                     s2e_conv(s2, s1, &c2, &c1);
3525                     if (c2 < 0x100){
3526                         c1 &= 0x7f;
3527                         c2 &= 0x7f;
3528                     }
3529                 }
3530             }
3531 #endif /* SHIFTJIS_CP932 */
3532         }
3533     }
3534     (*oconv)(c2, c1);
3535     return 0;
3536 }
3537
3538 #ifdef UTF8_INPUT_ENABLE
3539 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3540 {
3541     nkf_char ret = 0;
3542
3543     if (!c1){
3544         *p2 = 0;
3545         *p1 = c2;
3546     }else if (0xc0 <= c2 && c2 <= 0xef) {
3547         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3548 #ifdef NUMCHAR_OPTION
3549         if (ret > 0){
3550             if (p2) *p2 = 0;
3551             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3552             ret = 0;
3553         }
3554 #endif
3555     }
3556     return ret;
3557 }
3558
3559 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3560 {
3561     nkf_char ret = 0;
3562     static const char w_iconv_utf8_1st_byte[] =
3563     { /* 0xC0 - 0xFF */
3564         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3565         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3566         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3567         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3568
3569     if (c2 < 0 || 0xff < c2) {
3570     }else if (c2 == 0) { /* 0 : 1 byte*/
3571         c0 = 0;
3572     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3573         return 0;
3574     } else{
3575         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3576         case 21:
3577             if (c1 < 0x80 || 0xBF < c1) return 0;
3578             break;
3579         case 30:
3580             if (c0 == 0) return -1;
3581             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3582                 return 0;
3583             break;
3584         case 31:
3585         case 33:
3586             if (c0 == 0) return -1;
3587             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3588                 return 0;
3589             break;
3590         case 32:
3591             if (c0 == 0) return -1;
3592             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3593                 return 0;
3594             break;
3595         case 40:
3596             if (c0 == 0) return -2;
3597             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3598                 return 0;
3599             break;
3600         case 41:
3601             if (c0 == 0) return -2;
3602             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3603                 return 0;
3604             break;
3605         case 42:
3606             if (c0 == 0) return -2;
3607             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3608                 return 0;
3609             break;
3610         default:
3611             return 0;
3612             break;
3613         }
3614     }
3615     if (c2 == 0 || c2 == EOF){
3616     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3617         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3618         c2 = 0;
3619     } else {
3620         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3621     }
3622     if (ret == 0){
3623         (*oconv)(c2, c1);
3624     }
3625     return ret;
3626 }
3627 #endif
3628
3629 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3630 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3631 {
3632     val &= VALUE_MASK;
3633     if (val < 0x80){
3634         *p2 = val;
3635         *p1 = 0;
3636         *p0 = 0;