OSDN Git Service

* -s is now alias of Windows-31J.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.162 2008/01/01 14:21:20 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-01-02"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #if defined(DEFAULT_CODE_JIS)
44 #elif defined(DEFAULT_CODE_SJIS)
45 #elif defined(DEFAULT_CODE_EUC)
46 #elif defined(DEFAULT_CODE_UTF8)
47 #else
48 #define DEFAULT_CODE_JIS 1
49 #endif
50
51 #ifndef MIME_DECODE_DEFAULT
52 #define MIME_DECODE_DEFAULT STRICT_MIME
53 #endif
54 #ifndef X0201_DEFAULT
55 #define X0201_DEFAULT TRUE
56 #endif
57
58 #if DEFAULT_NEWLINE == 0x0D0A
59 #define PUT_NEWLINE(func) do {\
60     func(0x0D);\
61     func(0x0A);\
62 } while (0)
63 #define OCONV_NEWLINE(func) do {\
64     func(0, 0x0D);\
65     func(0, 0x0A);\
66 } while (0)
67 #elif DEFAULT_NEWLINE == 0x0D
68 #define PUT_NEWLINE(func) func(0x0D)
69 #define OCONV_NEWLINE(func) func(0, 0x0D)
70 #else
71 #define DEFAULT_NEWLINE 0x0A
72 #define PUT_NEWLINE(func) func(0x0A)
73 #define OCONV_NEWLINE(func) func(0, 0x0A)
74 #endif
75 #ifdef HELP_OUTPUT_STDERR
76 #define HELP_OUTPUT stderr
77 #else
78 #define HELP_OUTPUT stdout
79 #endif
80
81 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
82 #define MSDOS
83 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
84 #define __WIN32__
85 #endif
86 #endif
87
88 #ifdef PERL_XS
89 #undef OVERWRITE
90 #endif
91
92 #ifndef PERL_XS
93 #include <stdio.h>
94 #endif
95
96 #include <stdlib.h>
97 #include <string.h>
98
99 #if defined(MSDOS) || defined(__OS2__)
100 #include <fcntl.h>
101 #include <io.h>
102 #if defined(_MSC_VER) || defined(__WATCOMC__)
103 #define mktemp _mktemp
104 #endif
105 #endif
106
107 #ifdef MSDOS
108 #ifdef LSI_C
109 #define setbinmode(fp) fsetbin(fp)
110 #elif defined(__DJGPP__)
111 #include <libc/dosio.h>
112 #define setbinmode(fp) djgpp_setbinmode(fp)
113 #else /* Microsoft C, Turbo C */
114 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
115 #endif
116 #else /* UNIX */
117 #define setbinmode(fp)
118 #endif
119
120 #if defined(__DJGPP__)
121 void  djgpp_setbinmode(FILE *fp)
122 {
123     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
124     int fd, m;
125     fd = fileno(fp);
126     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
127     __file_handle_set(fd, m);
128 }
129 #endif
130
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
133 #else /* BSD */
134 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
135 #endif
136
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
139 #define         EASYWIN
140 #ifndef __WIN16__
141 #define __WIN16__
142 #endif
143 #include <windows.h>
144 #endif
145
146 #ifdef OVERWRITE
147 /* added by satoru@isoternet.org */
148 #if defined(__EMX__)
149 #include <sys/types.h>
150 #endif
151 #include <sys/stat.h>
152 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
153 #include <unistd.h>
154 #if defined(__WATCOMC__)
155 #include <sys/utime.h>
156 #else
157 #include <utime.h>
158 #endif
159 #else /* defined(MSDOS) */
160 #ifdef __WIN32__
161 #ifdef __BORLANDC__ /* BCC32 */
162 #include <utime.h>
163 #else /* !defined(__BORLANDC__) */
164 #include <sys/utime.h>
165 #endif /* (__BORLANDC__) */
166 #else /* !defined(__WIN32__) */
167 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
168 #include <sys/utime.h>
169 #elif defined(__TURBOC__) /* BCC */
170 #include <utime.h>
171 #elif defined(LSI_C) /* LSI C */
172 #endif /* (__WIN32__) */
173 #endif
174 #endif
175 #endif
176
177 #define         FALSE   0
178 #define         TRUE    1
179
180 /* state of output_mode and input_mode
181
182    c2           0 means ASCII
183                 JIS_X_0201
184                 ISO_8859_1
185                 JIS_X_0208
186                 EOF      all termination
187    c1           32bit data
188
189  */
190
191 /* Input Assumption */
192
193 #define         JIS_INPUT       4
194 #define         EUC_INPUT      16
195 #define         SJIS_INPUT      5
196 #define         LATIN1_INPUT    6
197 #define         UTF8_INPUT     13
198 #define         UTF16_INPUT    1015
199 #define         UTF32_INPUT    1017
200
201 #define         FIXED_MIME      7
202 #define         STRICT_MIME     8
203
204 /* MIME ENCODE */
205
206
207 /* byte order */
208 enum byte_order {
209     ENDIAN_BIG    = 1,
210     ENDIAN_LITTLE = 2,
211     ENDIAN_2143   = 3,
212     ENDIAN_3412   = 4
213 };
214
215 /* ASCII CODE */
216
217 #define         BS      0x08
218 #define         TAB     0x09
219 #define         LF      0x0a
220 #define         CR      0x0d
221 #define         ESC     0x1b
222 #define         SP      0x20
223 #define         AT      0x40
224 #define         SSP     0xa0
225 #define         DEL     0x7f
226 #define         SI      0x0f
227 #define         SO      0x0e
228 #define         SSO     0x8e
229 #define         SS3     0x8f
230 #define         CRLF    0x0D0A
231
232
233 /* encodings */
234
235 enum nkf_encodings {
236     ASCII,
237     ISO_8859_1,
238     ISO_2022_JP,
239     CP50220,
240     CP50221,
241     CP50222,
242     ISO_2022_JP_1,
243     ISO_2022_JP_3,
244     SHIFT_JIS,
245     WINDOWS_31J,
246     CP10001,
247     EUC_JP,
248     CP51932,
249     EUCJP_MS,
250     EUCJP_ASCII,
251     SHIFT_JISX0213,
252     SHIFT_JIS_2004,
253     EUC_JISX0213,
254     EUC_JIS_2004,
255     UTF_8,
256     UTF_8N,
257     UTF_8_BOM,
258     UTF8_MAC,
259     UTF_16,
260     UTF_16BE,
261     UTF_16BE_BOM,
262     UTF_16LE,
263     UTF_16LE_BOM,
264     UTF_32,
265     UTF_32BE,
266     UTF_32BE_BOM,
267     UTF_32LE,
268     UTF_32LE_BOM,
269     JIS_X_0201=0x1000,
270     JIS_X_0208=0x1001,
271     JIS_X_0212=0x1002,
272     JIS_X_0213_1=0x1003,
273     JIS_X_0213_2=0x1004,
274     BINARY
275 };
276
277 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
278 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
279 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
280 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
281 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
282 void j_oconv(nkf_char c2, nkf_char c1);
283 void s_oconv(nkf_char c2, nkf_char c1);
284 void e_oconv(nkf_char c2, nkf_char c1);
285 void w_oconv(nkf_char c2, nkf_char c1);
286 void w_oconv16(nkf_char c2, nkf_char c1);
287 void w_oconv32(nkf_char c2, nkf_char c1);
288
289 typedef struct {
290     const char *name;
291     nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
292     void (*oconv)(nkf_char c2, nkf_char c1);
293 } nkf_native_encoding;
294
295 nkf_native_encoding NkfEncodingASCII =          { "US_ASCII", e_iconv, e_oconv };
296 nkf_native_encoding NkfEncodingISO_2022_JP =    { "ISO-2022-JP", e_iconv, j_oconv };
297 nkf_native_encoding NkfEncodingShift_JIS =      { "Shift_JIS", s_iconv, s_oconv };
298 nkf_native_encoding NkfEncodingEUC_JP =         { "EUC-JP", e_iconv, e_oconv };
299 nkf_native_encoding NkfEncodingUTF_8 =          { "UTF-8", w_iconv, w_oconv };
300 nkf_native_encoding NkfEncodingUTF_16 =         { "UTF-16", w_iconv16, w_oconv16 };
301 nkf_native_encoding NkfEncodingUTF_32 =         { "UTF-32", w_iconv32, w_oconv32 };
302
303 typedef struct {
304     const int id;
305     const char *name;
306     const nkf_native_encoding *base_encoding;
307 } nkf_encoding;
308 nkf_encoding nkf_encoding_table[] = {
309     {ASCII,             "ASCII",                &NkfEncodingASCII},
310     {ISO_8859_1,        "ISO-8859-1",           &NkfEncodingASCII},
311     {ISO_2022_JP,       "ISO-2022-JP",          &NkfEncodingISO_2022_JP},
312     {CP50220,           "CP50220",              &NkfEncodingISO_2022_JP},
313     {CP50221,           "CP50221",              &NkfEncodingISO_2022_JP},
314     {CP50222,           "CP50222",              &NkfEncodingISO_2022_JP},
315     {ISO_2022_JP_1,     "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
316     {ISO_2022_JP_3,     "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
317     {SHIFT_JIS,         "Shift_JIS",            &NkfEncodingShift_JIS},
318     {WINDOWS_31J,       "Windows-31J",          &NkfEncodingShift_JIS},
319     {CP10001,           "CP10001",              &NkfEncodingShift_JIS},
320     {EUC_JP,            "EUC-JP",               &NkfEncodingEUC_JP},
321     {CP51932,           "CP51932",              &NkfEncodingEUC_JP},
322     {EUCJP_MS,          "eucJP-MS",             &NkfEncodingEUC_JP},
323     {EUCJP_ASCII,       "eucJP-ASCII",          &NkfEncodingEUC_JP},
324     {SHIFT_JISX0213,    "Shift_JISX0213",       &NkfEncodingShift_JIS},
325     {SHIFT_JIS_2004,    "Shift_JIS-2004",       &NkfEncodingShift_JIS},
326     {EUC_JISX0213,      "EUC-JISX0213",         &NkfEncodingEUC_JP},
327     {EUC_JIS_2004,      "EUC-JIS-2004",         &NkfEncodingEUC_JP},
328     {UTF_8,             "UTF-8",                &NkfEncodingUTF_8},
329     {UTF_8N,            "UTF-8N",               &NkfEncodingUTF_8},
330     {UTF_8_BOM,         "UTF-8-BOM",            &NkfEncodingUTF_8},
331     {UTF8_MAC,          "UTF8-MAC",             &NkfEncodingUTF_8},
332     {UTF_16,            "UTF-16",               &NkfEncodingUTF_16},
333     {UTF_16BE,          "UTF-16BE",             &NkfEncodingUTF_16},
334     {UTF_16BE_BOM,      "UTF-16BE-BOM",         &NkfEncodingUTF_16},
335     {UTF_16LE,          "UTF-16LE",             &NkfEncodingUTF_16},
336     {UTF_16LE_BOM,      "UTF-16LE-BOM",         &NkfEncodingUTF_16},
337     {UTF_32,            "UTF-32",               &NkfEncodingUTF_32},
338     {UTF_32BE,          "UTF-32BE",             &NkfEncodingUTF_32},
339     {UTF_32BE_BOM,      "UTF-32BE-BOM",         &NkfEncodingUTF_32},
340     {UTF_32LE,          "UTF-32LE",             &NkfEncodingUTF_32},
341     {UTF_32LE_BOM,      "UTF-32LE-BOM",         &NkfEncodingUTF_32},
342     {BINARY,            "BINARY",               &NkfEncodingASCII},
343     {-1,                NULL,                   NULL}
344 };
345 #define NKF_ENCODING_TABLE_SIZE 34
346 struct {
347     const char *name;
348     const int id;
349 } encoding_name_to_id_table[] = {
350     {"ASCII",                   ASCII},
351     {"ISO-2022-JP",             ISO_2022_JP},
352     {"X-ISO2022JP-CP932",       CP50220},
353     {"CP50220",                 CP50220},
354     {"CP50221",                 CP50221},
355     {"CP50222",                 CP50222},
356     {"ISO-2022-JP-1",           ISO_2022_JP_1},
357     {"ISO-2022-JP-3",           ISO_2022_JP_3},
358     {"SHIFT_JIS",               SHIFT_JIS},
359     {"SJIS",                    SHIFT_JIS},
360     {"WINDOWS-31J",             WINDOWS_31J},
361     {"CSWINDOWS31J",            WINDOWS_31J},
362     {"CP932",                   WINDOWS_31J},
363     {"MS932",                   WINDOWS_31J},
364     {"CP10001",                 CP10001},
365     {"EUCJP",                   EUC_JP},
366     {"EUC-JP",                  EUC_JP},
367     {"CP51932",                 CP51932},
368     {"EUC-JP-MS",               EUCJP_MS},
369     {"EUCJP-MS",                EUCJP_MS},
370     {"EUCJPMS",                 EUCJP_MS},
371     {"EUC-JP-ASCII",            EUCJP_ASCII},
372     {"EUCJP-ASCII",             EUCJP_ASCII},
373     {"SHIFT_JISX0213",          SHIFT_JISX0213},
374     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
375     {"EUC-JISX0213",            EUC_JISX0213},
376     {"EUC-JIS-2004",            EUC_JIS_2004},
377     {"UTF-8",                   UTF_8},
378     {"UTF-8N",                  UTF_8N},
379     {"UTF-8-BOM",               UTF_8_BOM},
380     {"UTF8-MAC",                UTF8_MAC},
381     {"UTF-8-MAC",               UTF8_MAC},
382     {"UTF-16",                  UTF_16},
383     {"UTF-16BE",                UTF_16BE},
384     {"UTF-16BE-BOM",            UTF_16BE_BOM},
385     {"UTF-16LE",                UTF_16LE},
386     {"UTF-16LE-BOM",            UTF_16LE_BOM},
387     {"UTF-32",                  UTF_32},
388     {"UTF-32BE",                UTF_32BE},
389     {"UTF-32BE-BOM",            UTF_32BE_BOM},
390     {"UTF-32LE",                UTF_32LE},
391     {"UTF-32LE-BOM",            UTF_32LE_BOM},
392     {"BINARY",                  BINARY},
393     {NULL,                      -1}
394 };
395 #if defined(DEFAULT_CODE_JIS)
396 #define     DEFAULT_ENCODING ISO_2022_JP
397 #elif defined(DEFAULT_CODE_SJIS)
398 #define     DEFAULT_ENCODING SHIFT_JIS
399 #elif defined(DEFAULT_CODE_EUC)
400 #define     DEFAULT_ENCODING EUC_JP
401 #elif defined(DEFAULT_CODE_UTF8)
402 #define     DEFAULT_ENCODING UTF_8
403 #endif
404
405
406 #define         is_alnum(c)  \
407             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
408
409 /* I don't trust portablity of toupper */
410 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
411 #define nkf_isoctal(c)  ('0'<=c && c<='7')
412 #define nkf_isdigit(c)  ('0'<=c && c<='9')
413 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
414 #define nkf_isblank(c) (c == SP || c == TAB)
415 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
416 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
417 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
418 #define nkf_isprint(c) (SP<=c && c<='~')
419 #define nkf_isgraph(c) ('!'<=c && c<='~')
420 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
421                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
422                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
423 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
424 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
425 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
426     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
427      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
428
429 #define CP932_TABLE_BEGIN 0xFA
430 #define CP932_TABLE_END   0xFC
431 #define CP932INV_TABLE_BEGIN 0xED
432 #define CP932INV_TABLE_END   0xEE
433 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
434
435 #define         HOLD_SIZE       1024
436 #if defined(INT_IS_SHORT)
437 #define         IOBUF_SIZE      2048
438 #else
439 #define         IOBUF_SIZE      16384
440 #endif
441
442 #define         DEFAULT_J       'B'
443 #define         DEFAULT_R       'B'
444
445 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
446 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
447
448 #define         RANGE_NUM_MAX   18
449 #define         GETA1   0x22
450 #define         GETA2   0x2e
451
452
453 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
454 #define sizeof_euc_to_utf8_1byte 94
455 #define sizeof_euc_to_utf8_2bytes 94
456 #define sizeof_utf8_to_euc_C2 64
457 #define sizeof_utf8_to_euc_E5B8 64
458 #define sizeof_utf8_to_euc_2bytes 112
459 #define sizeof_utf8_to_euc_3bytes 16
460 #endif
461
462 /* MIME preprocessor */
463
464 #ifdef EASYWIN /*Easy Win */
465 extern POINT _BufferSize;
466 #endif
467
468 struct input_code{
469     char *name;
470     nkf_char stat;
471     nkf_char score;
472     nkf_char index;
473     nkf_char buf[3];
474     void (*status_func)(struct input_code *, nkf_char);
475     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
476     int _file_stat;
477 };
478
479 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
480 static nkf_encoding *input_encoding = NULL;
481 static nkf_encoding *output_encoding = NULL;
482
483 #if !defined(PERL_XS) && !defined(WIN32DLL)
484 static  nkf_char     noconvert(FILE *f);
485 #endif
486 static  void    module_connection(void);
487 static  nkf_char     kanji_convert(FILE *f);
488 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
489 static  nkf_char     push_hold_buf(nkf_char c2);
490 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
491 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
492 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
493 /* UCS Mapping
494  * 0: Shift_JIS, eucJP-ascii
495  * 1: eucJP-ms
496  * 2: CP932, CP51932
497  * 3: CP10001
498  */
499 #define UCS_MAP_ASCII   0
500 #define UCS_MAP_MS      1
501 #define UCS_MAP_CP932   2
502 #define UCS_MAP_CP10001 3
503 static int ms_ucs_map_f = UCS_MAP_ASCII;
504 #endif
505 #ifdef UTF8_INPUT_ENABLE
506 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
507 static  int     no_cp932ext_f = FALSE;
508 /* ignore ZERO WIDTH NO-BREAK SPACE */
509 static  int     no_best_fit_chars_f = FALSE;
510 static  int     input_endian = ENDIAN_BIG;
511 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
512 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
513 static  void    encode_fallback_html(nkf_char c);
514 static  void    encode_fallback_xml(nkf_char c);
515 static  void    encode_fallback_java(nkf_char c);
516 static  void    encode_fallback_perl(nkf_char c);
517 static  void    encode_fallback_subchar(nkf_char c);
518 static  void    (*encode_fallback)(nkf_char c) = NULL;
519 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
520 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
521 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
522 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
523 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
524 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
525 static  void    w_status(struct input_code *, nkf_char);
526 #endif
527 #ifdef UTF8_OUTPUT_ENABLE
528 static  int     output_bom_f = FALSE;
529 static  int     output_endian = ENDIAN_BIG;
530 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
531 #endif
532 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
533 static  void    fold_conv(nkf_char c2,nkf_char c1);
534 static  void    nl_conv(nkf_char c2,nkf_char c1);
535 static  void    z_conv(nkf_char c2,nkf_char c1);
536 static  void    rot_conv(nkf_char c2,nkf_char c1);
537 static  void    hira_conv(nkf_char c2,nkf_char c1);
538 static  void    base64_conv(nkf_char c2,nkf_char c1);
539 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
540 static  void    no_connection(nkf_char c2,nkf_char c1);
541 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
542
543 static  void    code_score(struct input_code *ptr);
544 static  void    code_status(nkf_char c);
545
546 static  void    std_putc(nkf_char c);
547 static  nkf_char     std_getc(FILE *f);
548 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
549
550 static  nkf_char     broken_getc(FILE *f);
551 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
552
553 static  nkf_char     mime_begin(FILE *f);
554 static  nkf_char     mime_getc(FILE *f);
555 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
556
557 static  void    switch_mime_getc(void);
558 static  void    unswitch_mime_getc(void);
559 static  nkf_char     mime_begin_strict(FILE *f);
560 static  nkf_char     mime_getc_buf(FILE *f);
561 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
562 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
563
564 static  nkf_char     base64decode(nkf_char c);
565 static  void    mime_prechar(nkf_char c2, nkf_char c1);
566 static  void    mime_putc(nkf_char c);
567 static  void    open_mime(nkf_char c);
568 static  void    close_mime(void);
569 static  void    eof_mime(void);
570 static  void    mimeout_addchar(nkf_char c);
571 #ifndef PERL_XS
572 static  void    usage(void);
573 static  void    version(void);
574 static  void    show_configuration(void);
575 #endif
576 static  void    options(unsigned char *c);
577 static  void    reinit(void);
578
579 /* buffers */
580
581 #if !defined(PERL_XS) && !defined(WIN32DLL)
582 static unsigned char   stdibuf[IOBUF_SIZE];
583 static unsigned char   stdobuf[IOBUF_SIZE];
584 #endif
585 static unsigned char   hold_buf[HOLD_SIZE*2];
586 static int             hold_count = 0;
587
588 /* MIME preprocessor fifo */
589
590 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
591 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
592 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
593 static unsigned char           mime_buf[MIME_BUF_SIZE];
594 static unsigned int            mime_top = 0;
595 static unsigned int            mime_last = 0;  /* decoded */
596 static unsigned int            mime_input = 0; /* undecoded */
597 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
598
599 /* flags */
600 static int             unbuf_f = FALSE;
601 static int             estab_f = FALSE;
602 static int             nop_f = FALSE;
603 static int             binmode_f = TRUE;       /* binary mode */
604 static int             rot_f = FALSE;          /* rot14/43 mode */
605 static int             hira_f = FALSE;          /* hira/kata henkan */
606 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
607 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
608 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
609 static int             mimebuf_f = FALSE;      /* MIME buffered input */
610 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
611 static int             iso8859_f = FALSE;      /* ISO8859 through */
612 static int             mimeout_f = FALSE;       /* base64 mode */
613 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
614 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
615
616 #ifdef UNICODE_NORMALIZATION
617 static int nfc_f = FALSE;
618 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
619 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
620 static nkf_char nfc_getc(FILE *f);
621 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
622 #endif
623
624 #ifdef INPUT_OPTION
625 static int cap_f = FALSE;
626 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
627 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
628 static nkf_char cap_getc(FILE *f);
629 static nkf_char cap_ungetc(nkf_char c,FILE *f);
630
631 static int url_f = FALSE;
632 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
633 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
634 static nkf_char url_getc(FILE *f);
635 static nkf_char url_ungetc(nkf_char c,FILE *f);
636 #endif
637
638 #if defined(INT_IS_SHORT)
639 #define NKF_INT32_C(n)   (n##L)
640 #else
641 #define NKF_INT32_C(n)   (n)
642 #endif
643 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
644 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
645 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
646 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
647 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
648 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
649 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
650
651 #ifdef NUMCHAR_OPTION
652 static int numchar_f = FALSE;
653 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
654 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
655 static nkf_char numchar_getc(FILE *f);
656 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
657 #endif
658
659 #ifdef CHECK_OPTION
660 static int noout_f = FALSE;
661 static void no_putc(nkf_char c);
662 static int debug_f = FALSE;
663 static void debug(const char *str);
664 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
665 #endif
666
667 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
668 #if !defined PERL_XS
669 static  void    print_guessed_code(char *filename);
670 #endif
671 static  void    set_input_codename(char *codename);
672
673 #ifdef EXEC_IO
674 static int exec_f = 0;
675 #endif
676
677 #ifdef SHIFTJIS_CP932
678 /* invert IBM extended characters to others */
679 static int cp51932_f = FALSE;
680
681 /* invert NEC-selected IBM extended characters to IBM extended characters */
682 static int cp932inv_f = TRUE;
683
684 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
685 #endif /* SHIFTJIS_CP932 */
686
687 #ifdef X0212_ENABLE
688 static int x0212_f = FALSE;
689 static nkf_char x0212_shift(nkf_char c);
690 static nkf_char x0212_unshift(nkf_char c);
691 #endif
692 static int x0213_f = FALSE;
693
694 static unsigned char prefix_table[256];
695
696 static void set_code_score(struct input_code *ptr, nkf_char score);
697 static void clr_code_score(struct input_code *ptr, nkf_char score);
698 static void status_disable(struct input_code *ptr);
699 static void status_push_ch(struct input_code *ptr, nkf_char c);
700 static void status_clear(struct input_code *ptr);
701 static void status_reset(struct input_code *ptr);
702 static void status_reinit(struct input_code *ptr);
703 static void status_check(struct input_code *ptr, nkf_char c);
704 static void e_status(struct input_code *, nkf_char);
705 static void s_status(struct input_code *, nkf_char);
706
707 struct input_code input_code_list[] = {
708     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
709     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
710 #ifdef UTF8_INPUT_ENABLE
711     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
712     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
713     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
714 #endif
715     {0}
716 };
717
718 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
719 static int              base64_count = 0;
720
721 /* X0208 -> ASCII converter */
722
723 /* fold parameter */
724 static int             f_line = 0;    /* chars in line */
725 static int             f_prev = 0;
726 static int             fold_preserve_f = FALSE; /* preserve new lines */
727 static int             fold_f  = FALSE;
728 static int             fold_len  = 0;
729
730 /* options */
731 static unsigned char   kanji_intro = DEFAULT_J;
732 static unsigned char   ascii_intro = DEFAULT_R;
733
734 /* Folding */
735
736 #define FOLD_MARGIN  10
737 #define DEFAULT_FOLD 60
738
739 static int             fold_margin  = FOLD_MARGIN;
740
741 /* converters */
742
743 #ifdef DEFAULT_CODE_JIS
744 #   define  DEFAULT_CONV j_oconv
745 #endif
746 #ifdef DEFAULT_CODE_SJIS
747 #   define  DEFAULT_CONV s_oconv
748 #endif
749 #ifdef DEFAULT_CODE_EUC
750 #   define  DEFAULT_CONV e_oconv
751 #endif
752 #ifdef DEFAULT_CODE_UTF8
753 #   define  DEFAULT_CONV w_oconv
754 #endif
755
756 /* process default */
757 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
758 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
759
760 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
761 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
762 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
763 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
764 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
765 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
766 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
767
768 /* static redirections */
769
770 static  void   (*o_putc)(nkf_char c) = std_putc;
771
772 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
773 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
774
775 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
776 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
777
778 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
779
780 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
781 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
782
783 /* for strict mime */
784 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
785 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
786
787 /* Global states */
788 static int output_mode = ASCII,    /* output kanji mode */
789            input_mode =  ASCII,    /* input kanji mode */
790            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
791 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
792
793 /* X0201 / X0208 conversion tables */
794
795 /* X0201 kana conversion table */
796 /* 90-9F A0-DF */
797 static const unsigned char cv[]= {
798     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
799     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
800     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
801     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
802     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
803     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
804     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
805     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
806     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
807     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
808     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
809     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
810     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
811     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
812     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
813     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
814     0x00,0x00};
815
816
817 /* X0201 kana conversion table for daguten */
818 /* 90-9F A0-DF */
819 static const unsigned char dv[]= {
820     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
822     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
825     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
826     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
827     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
828     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
829     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
830     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
831     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
832     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
833     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836     0x00,0x00};
837
838 /* X0201 kana conversion table for han-daguten */
839 /* 90-9F A0-DF */
840 static const unsigned char ev[]= {
841     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
852     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
853     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
855     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857     0x00,0x00};
858
859
860 /* X0208 kigou conversion table */
861 /* 0x8140 - 0x819e */
862 static const unsigned char fv[] = {
863
864     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
865     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
866     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
867     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
868     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
869     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
870     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
871     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
872     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
873     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
874     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
875     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
876 } ;
877
878
879
880 static int             file_out_f = FALSE;
881 #ifdef OVERWRITE
882 static int             overwrite_f = FALSE;
883 static int             preserve_time_f = FALSE;
884 static int             backup_f = FALSE;
885 static char            *backup_suffix = "";
886 static char *get_backup_filename(const char *suffix, const char *filename);
887 #endif
888
889 static int nlmode_f = 0;   /* CR, LF, CRLF */
890 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
891 static nkf_char prev_cr = 0; /* CR or 0 */
892 #ifdef EASYWIN /*Easy Win */
893 static int             end_check;
894 #endif /*Easy Win */
895
896 #define STD_GC_BUFSIZE (256)
897 nkf_char std_gc_buf[STD_GC_BUFSIZE];
898 nkf_char std_gc_ndx;
899
900 char* nkf_strcpy(const char *str)
901 {
902     char* result = malloc(strlen(str) + 1);
903     if (!result){
904         perror(str);
905         return "";
906     }
907     strcpy(result, str);
908     return result;
909 }
910
911 static void nkf_str_upcase(const char *src, char *dest, size_t length)
912 {
913     int i = 0;
914     for (; i < length && src[i]; i++) {
915         dest[i] = nkf_toupper(src[i]);
916     }
917     dest[i] = 0;
918 }
919
920 static nkf_encoding *nkf_enc_from_index(int idx)
921 {
922     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
923         return 0;
924     }
925     return &nkf_encoding_table[idx];
926 }
927
928 static int nkf_enc_find_index(const char *name)
929 {
930     int i, index = -1;
931     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
932         if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
933             return encoding_name_to_id_table[i].id;
934         }
935     }
936     return index;
937 }
938
939 static nkf_encoding *nkf_enc_find(const char *name)
940 {
941     int idx = -1;
942     idx = nkf_enc_find_index(name);
943     if (idx < 0) return 0;
944     return nkf_enc_from_index(idx);
945 }
946
947 #define nkf_enc_name(enc) (enc)->name
948 #define nkf_enc_to_index(enc) (enc)->id
949 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
950 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
951 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
952 #define nkf_enc_asciicompat(enc) (\
953     nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
954     nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
955 #define nkf_enc_unicode_p(enc) (\
956     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
957     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
958     nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
959 #define nkf_enc_cp5022x_p(enc) (\
960     nkf_enc_to_index(enc) == CP50220 ||\
961     nkf_enc_to_index(enc) == CP50221 ||\
962     nkf_enc_to_index(enc) == CP50222)
963
964 #ifdef WIN32DLL
965 #include "nkf32dll.c"
966 #elif defined(PERL_XS)
967 #else /* WIN32DLL */
968 int main(int argc, char **argv)
969 {
970     FILE  *fin;
971     unsigned char  *cp;
972
973     char *outfname = NULL;
974     char *origfname;
975
976 #ifdef EASYWIN /*Easy Win */
977     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
978 #endif
979
980     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
981         cp = (unsigned char *)*argv;
982         options(cp);
983         if (guess_f) {
984 #ifdef CHECK_OPTION
985             int debug_f_back = debug_f;
986 #endif
987 #ifdef EXEC_IO
988             int exec_f_back = exec_f;
989 #endif
990 #ifdef X0212_ENABLE
991             int x0212_f_back = x0212_f;
992 #endif
993             int x0213_f_back = x0213_f;
994             int guess_f_back = guess_f;
995             reinit();
996             guess_f = guess_f_back;
997             mime_f = FALSE;
998 #ifdef CHECK_OPTION
999             debug_f = debug_f_back;
1000 #endif
1001 #ifdef EXEC_IO
1002             exec_f = exec_f_back;
1003 #endif
1004 #ifdef X0212_ENABLE
1005             x0212_f = x0212_f_back;
1006 #endif
1007             x0213_f = x0213_f_back;
1008         }
1009 #ifdef EXEC_IO
1010         if (exec_f){
1011             int fds[2], pid;
1012             if (pipe(fds) < 0 || (pid = fork()) < 0){
1013                 abort();
1014             }
1015             if (pid == 0){
1016                 if (exec_f > 0){
1017                     close(fds[0]);
1018                     dup2(fds[1], 1);
1019                 }else{
1020                     close(fds[1]);
1021                     dup2(fds[0], 0);
1022                 }
1023                 execvp(argv[1], &argv[1]);
1024             }
1025             if (exec_f > 0){
1026                 close(fds[1]);
1027                 dup2(fds[0], 0);
1028             }else{
1029                 close(fds[0]);
1030                 dup2(fds[1], 1);
1031             }
1032             argc = 0;
1033             break;
1034         }
1035 #endif
1036     }
1037
1038     if (binmode_f == TRUE)
1039 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1040     if (freopen("","wb",stdout) == NULL)
1041         return (-1);
1042 #else
1043     setbinmode(stdout);
1044 #endif
1045
1046     if (unbuf_f)
1047       setbuf(stdout, (char *) NULL);
1048     else
1049       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
1050
1051     if (argc == 0) {
1052       if (binmode_f == TRUE)
1053 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1054       if (freopen("","rb",stdin) == NULL) return (-1);
1055 #else
1056       setbinmode(stdin);
1057 #endif
1058       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
1059       if (nop_f)
1060           noconvert(stdin);
1061       else {
1062           kanji_convert(stdin);
1063           if (guess_f) print_guessed_code(NULL);
1064       }
1065     } else {
1066       int nfiles = argc;
1067         int is_argument_error = FALSE;
1068       while (argc--) {
1069             input_codename = NULL;
1070             input_newline = 0;
1071 #ifdef CHECK_OPTION
1072             iconv_for_check = 0;
1073 #endif
1074           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
1075                 perror(*(argv-1));
1076                 is_argument_error = TRUE;
1077                 continue;
1078           } else {
1079 #ifdef OVERWRITE
1080               int fd = 0;
1081               int fd_backup = 0;
1082 #endif
1083
1084 /* reopen file for stdout */
1085               if (file_out_f == TRUE) {
1086 #ifdef OVERWRITE
1087                   if (overwrite_f){
1088                       outfname = malloc(strlen(origfname)
1089                                         + strlen(".nkftmpXXXXXX")
1090                                         + 1);
1091                       if (!outfname){
1092                           perror(origfname);
1093                           return -1;
1094                       }
1095                       strcpy(outfname, origfname);
1096 #ifdef MSDOS
1097                       {
1098                           int i;
1099                           for (i = strlen(outfname); i; --i){
1100                               if (outfname[i - 1] == '/'
1101                                   || outfname[i - 1] == '\\'){
1102                                   break;
1103                               }
1104                           }
1105                           outfname[i] = '\0';
1106                       }
1107                       strcat(outfname, "ntXXXXXX");
1108                       mktemp(outfname);
1109                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
1110                                 S_IREAD | S_IWRITE);
1111 #else
1112                       strcat(outfname, ".nkftmpXXXXXX");
1113                       fd = mkstemp(outfname);
1114 #endif
1115                       if (fd < 0
1116                           || (fd_backup = dup(fileno(stdout))) < 0
1117                           || dup2(fd, fileno(stdout)) < 0
1118                           ){
1119                           perror(origfname);
1120                           return -1;
1121                       }
1122                   }else
1123 #endif
1124                   if(argc == 1) {
1125                       outfname = *argv++;
1126                       argc--;
1127                   } else {
1128                       outfname = "nkf.out";
1129                   }
1130
1131                   if(freopen(outfname, "w", stdout) == NULL) {
1132                       perror (outfname);
1133                       return (-1);
1134                   }
1135                   if (binmode_f == TRUE) {
1136 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1137                       if (freopen("","wb",stdout) == NULL)
1138                            return (-1);
1139 #else
1140                       setbinmode(stdout);
1141 #endif
1142                   }
1143               }
1144               if (binmode_f == TRUE)
1145 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1146                  if (freopen("","rb",fin) == NULL)
1147                     return (-1);
1148 #else
1149                  setbinmode(fin);
1150 #endif
1151               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
1152               if (nop_f)
1153                   noconvert(fin);
1154               else {
1155                   char *filename = NULL;
1156                   kanji_convert(fin);
1157                   if (nfiles > 1) filename = origfname;
1158                   if (guess_f) print_guessed_code(filename);
1159               }
1160               fclose(fin);
1161 #ifdef OVERWRITE
1162               if (overwrite_f) {
1163                   struct stat     sb;
1164 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1165                   time_t tb[2];
1166 #else
1167                   struct utimbuf  tb;
1168 #endif
1169
1170                   fflush(stdout);
1171                   close(fd);
1172                   if (dup2(fd_backup, fileno(stdout)) < 0){
1173                       perror("dup2");
1174                   }
1175                   if (stat(origfname, &sb)) {
1176                       fprintf(stderr, "Can't stat %s\n", origfname);
1177                   }
1178                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
1179                   if (chmod(outfname, sb.st_mode)) {
1180                       fprintf(stderr, "Can't set permission %s\n", outfname);
1181                   }
1182
1183                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
1184                     if(preserve_time_f){
1185 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1186                         tb[0] = tb[1] = sb.st_mtime;
1187                         if (utime(outfname, tb)) {
1188                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1189                         }
1190 #else
1191                         tb.actime  = sb.st_atime;
1192                         tb.modtime = sb.st_mtime;
1193                         if (utime(outfname, &tb)) {
1194                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1195                         }
1196 #endif
1197                     }
1198                     if(backup_f){
1199                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
1200 #ifdef MSDOS
1201                         unlink(backup_filename);
1202 #endif
1203                         if (rename(origfname, backup_filename)) {
1204                             perror(backup_filename);
1205                             fprintf(stderr, "Can't rename %s to %s\n",
1206                                     origfname, backup_filename);
1207                         }
1208                     }else{
1209 #ifdef MSDOS
1210                         if (unlink(origfname)){
1211                             perror(origfname);
1212                         }
1213 #endif
1214                     }
1215                   if (rename(outfname, origfname)) {
1216                       perror(origfname);
1217                       fprintf(stderr, "Can't rename %s to %s\n",
1218                               outfname, origfname);
1219                   }
1220                   free(outfname);
1221               }
1222 #endif
1223           }
1224       }
1225         if (is_argument_error)
1226             return(-1);
1227     }
1228 #ifdef EASYWIN /*Easy Win */
1229     if (file_out_f == FALSE)
1230         scanf("%d",&end_check);
1231     else
1232         fclose(stdout);
1233 #else /* for Other OS */
1234     if (file_out_f == TRUE)
1235         fclose(stdout);
1236 #endif /*Easy Win */
1237     return (0);
1238 }
1239 #endif /* WIN32DLL */
1240
1241 #ifdef OVERWRITE
1242 char *get_backup_filename(const char *suffix, const char *filename)
1243 {
1244     char *backup_filename;
1245     int asterisk_count = 0;
1246     int i, j;
1247     int filename_length = strlen(filename);
1248
1249     for(i = 0; suffix[i]; i++){
1250         if(suffix[i] == '*') asterisk_count++;
1251     }
1252
1253     if(asterisk_count){
1254         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1255         if (!backup_filename){
1256             perror("Can't malloc backup filename.");
1257             return NULL;
1258         }
1259
1260         for(i = 0, j = 0; suffix[i];){
1261             if(suffix[i] == '*'){
1262                 backup_filename[j] = '\0';
1263                 strncat(backup_filename, filename, filename_length);
1264                 i++;
1265                 j += filename_length;
1266             }else{
1267                 backup_filename[j++] = suffix[i++];
1268             }
1269         }
1270         backup_filename[j] = '\0';
1271     }else{
1272         j = strlen(suffix) + filename_length;
1273         backup_filename = malloc( + 1);
1274         strcpy(backup_filename, filename);
1275         strcat(backup_filename, suffix);
1276         backup_filename[j] = '\0';
1277     }
1278     return backup_filename;
1279 }
1280 #endif
1281
1282 static const struct {
1283     const char *name;
1284     const char *alias;
1285 } long_option[] = {
1286     {"ic=", ""},
1287     {"oc=", ""},
1288     {"base64","jMB"},
1289     {"euc","e"},
1290     {"euc-input","E"},
1291     {"fj","jm"},
1292     {"help","v"},
1293     {"jis","j"},
1294     {"jis-input","J"},
1295     {"mac","sLm"},
1296     {"mime","jM"},
1297     {"mime-input","m"},
1298     {"msdos","sLw"},
1299     {"sjis","s"},
1300     {"sjis-input","S"},
1301     {"unix","eLu"},
1302     {"version","V"},
1303     {"windows","sLw"},
1304     {"hiragana","h1"},
1305     {"katakana","h2"},
1306     {"katakana-hiragana","h3"},
1307     {"guess=", ""},
1308     {"guess", "g2"},
1309     {"cp932", ""},
1310     {"no-cp932", ""},
1311 #ifdef X0212_ENABLE
1312     {"x0212", ""},
1313 #endif
1314 #ifdef UTF8_OUTPUT_ENABLE
1315     {"utf8", "w"},
1316     {"utf16", "w16"},
1317     {"ms-ucs-map", ""},
1318     {"fb-skip", ""},
1319     {"fb-html", ""},
1320     {"fb-xml", ""},
1321     {"fb-perl", ""},
1322     {"fb-java", ""},
1323     {"fb-subchar", ""},
1324     {"fb-subchar=", ""},
1325 #endif
1326 #ifdef UTF8_INPUT_ENABLE
1327     {"utf8-input", "W"},
1328     {"utf16-input", "W16"},
1329     {"no-cp932ext", ""},
1330     {"no-best-fit-chars",""},
1331 #endif
1332 #ifdef UNICODE_NORMALIZATION
1333     {"utf8mac-input", ""},
1334 #endif
1335 #ifdef OVERWRITE
1336     {"overwrite", ""},
1337     {"overwrite=", ""},
1338     {"in-place", ""},
1339     {"in-place=", ""},
1340 #endif
1341 #ifdef INPUT_OPTION
1342     {"cap-input", ""},
1343     {"url-input", ""},
1344 #endif
1345 #ifdef NUMCHAR_OPTION
1346     {"numchar-input", ""},
1347 #endif
1348 #ifdef CHECK_OPTION
1349     {"no-output", ""},
1350     {"debug", ""},
1351 #endif
1352 #ifdef SHIFTJIS_CP932
1353     {"cp932inv", ""},
1354 #endif
1355 #ifdef EXEC_IO
1356     {"exec-in", ""},
1357     {"exec-out", ""},
1358 #endif
1359     {"prefix=", ""},
1360 };
1361
1362 static int option_mode = 0;
1363
1364 void options(unsigned char *cp)
1365 {
1366     nkf_char i, j;
1367     unsigned char *p;
1368     unsigned char *cp_back = NULL;
1369     char codeset[32];
1370     nkf_encoding *enc;
1371
1372     if (option_mode==1)
1373         return;
1374     while(*cp && *cp++!='-');
1375     while (*cp || cp_back) {
1376         if(!*cp){
1377             cp = cp_back;
1378             cp_back = NULL;
1379             continue;
1380         }
1381         p = 0;
1382         switch (*cp++) {
1383         case '-':  /* literal options */
1384             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1385                 option_mode = 1;
1386                 return;
1387             }
1388             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1389                 p = (unsigned char *)long_option[i].name;
1390                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1391                 if (*p == cp[j] || cp[j] == SP){
1392                     p = &cp[j] + 1;
1393                     break;
1394                 }
1395                 p = 0;
1396             }
1397             if (p == 0) {
1398                 fprintf(stderr, "unknown long option: --%s\n", cp);
1399                 return;
1400             }
1401             while(*cp && *cp != SP && cp++);
1402             if (long_option[i].alias[0]){
1403                 cp_back = cp;
1404                 cp = (unsigned char *)long_option[i].alias;
1405             }else{
1406                 if (strcmp(long_option[i].name, "ic=") == 0){
1407                     nkf_str_upcase((char *)p, codeset, 32);
1408                     enc = nkf_enc_find(codeset);
1409                     if (!enc) continue;
1410                     input_encoding = enc;
1411                     switch (nkf_enc_to_index(input_encoding)) {
1412                     case CP50220:
1413                     case CP50221:
1414                     case CP50222:
1415 #ifdef SHIFTJIS_CP932
1416                         cp51932_f = TRUE;
1417 #endif
1418 #ifdef UTF8_OUTPUT_ENABLE
1419                         ms_ucs_map_f = UCS_MAP_CP932;
1420 #endif
1421                         break;
1422                     case ISO_2022_JP_1:
1423 #ifdef X0212_ENABLE
1424                         x0212_f = TRUE;
1425 #endif
1426                         break;
1427                     case ISO_2022_JP_3:
1428 #ifdef X0212_ENABLE
1429                         x0212_f = TRUE;
1430 #endif
1431                         x0213_f = TRUE;
1432                         break;
1433                     case WINDOWS_31J:
1434 #ifdef SHIFTJIS_CP932
1435                         cp51932_f = TRUE;
1436 #endif
1437 #ifdef UTF8_OUTPUT_ENABLE
1438                         ms_ucs_map_f = UCS_MAP_CP932;
1439 #endif
1440                         break;
1441                     case CP10001:
1442 #ifdef SHIFTJIS_CP932
1443                         cp51932_f = TRUE;
1444 #endif
1445 #ifdef UTF8_OUTPUT_ENABLE
1446                         ms_ucs_map_f = UCS_MAP_CP10001;
1447 #endif
1448                         break;
1449                     case CP51932:
1450 #ifdef SHIFTJIS_CP932
1451                         cp51932_f = TRUE;
1452 #endif
1453 #ifdef UTF8_OUTPUT_ENABLE
1454                         ms_ucs_map_f = UCS_MAP_CP932;
1455 #endif
1456                         break;
1457                     case EUCJP_MS:
1458 #ifdef SHIFTJIS_CP932
1459                         cp51932_f = FALSE;
1460 #endif
1461 #ifdef UTF8_OUTPUT_ENABLE
1462                         ms_ucs_map_f = UCS_MAP_MS;
1463 #endif
1464                         break;
1465                     case EUCJP_ASCII:
1466 #ifdef SHIFTJIS_CP932
1467                         cp51932_f = FALSE;
1468 #endif
1469 #ifdef UTF8_OUTPUT_ENABLE
1470                         ms_ucs_map_f = UCS_MAP_ASCII;
1471 #endif
1472                         break;
1473                     case SHIFT_JISX0213:
1474                     case SHIFT_JIS_2004:
1475                         x0213_f = TRUE;
1476 #ifdef SHIFTJIS_CP932
1477                         cp51932_f = FALSE;
1478 #endif
1479                         break;
1480                     case EUC_JISX0213:
1481                     case EUC_JIS_2004:
1482                         x0213_f = TRUE;
1483 #ifdef SHIFTJIS_CP932
1484                         cp51932_f = FALSE;
1485 #endif
1486                         break;
1487 #ifdef UTF8_INPUT_ENABLE
1488 #ifdef UNICODE_NORMALIZATION
1489                     case UTF8_MAC:
1490                         nfc_f = TRUE;
1491                         break;
1492 #endif
1493                     case UTF_16:
1494                     case UTF_16BE:
1495                     case UTF_16BE_BOM:
1496                         input_endian = ENDIAN_BIG;
1497                         break;
1498                     case UTF_16LE:
1499                     case UTF_16LE_BOM:
1500                         input_endian = ENDIAN_LITTLE;
1501                         break;
1502                     case UTF_32:
1503                     case UTF_32BE:
1504                     case UTF_32BE_BOM:
1505                         input_endian = ENDIAN_BIG;
1506                         break;
1507                     case UTF_32LE:
1508                     case UTF_32LE_BOM:
1509                         input_endian = ENDIAN_LITTLE;
1510                         break;
1511 #endif
1512                     }
1513                     continue;
1514                 }
1515                 if (strcmp(long_option[i].name, "oc=") == 0){
1516                     x0201_f = FALSE;
1517                     nkf_str_upcase((char *)p, codeset, 32);
1518                     enc = nkf_enc_find(codeset);
1519                     if (enc <= 0) continue;
1520                     output_encoding = enc;
1521                     switch (nkf_enc_to_index(output_encoding)) {
1522                     case CP50220:
1523                         x0201_f = TRUE;
1524 #ifdef SHIFTJIS_CP932
1525                         cp932inv_f = FALSE;
1526 #endif
1527 #ifdef UTF8_OUTPUT_ENABLE
1528                         ms_ucs_map_f = UCS_MAP_CP932;
1529 #endif
1530                         break;
1531                     case CP50221:
1532 #ifdef SHIFTJIS_CP932
1533                         cp932inv_f = FALSE;
1534 #endif
1535 #ifdef UTF8_OUTPUT_ENABLE
1536                         ms_ucs_map_f = UCS_MAP_CP932;
1537 #endif
1538                         break;
1539                     case ISO_2022_JP_1:
1540 #ifdef X0212_ENABLE
1541                         x0212_f = TRUE;
1542 #endif
1543 #ifdef SHIFTJIS_CP932
1544                         cp932inv_f = FALSE;
1545 #endif
1546                         break;
1547                     case ISO_2022_JP_3:
1548 #ifdef X0212_ENABLE
1549                         x0212_f = TRUE;
1550 #endif
1551                         x0213_f = TRUE;
1552 #ifdef SHIFTJIS_CP932
1553                         cp932inv_f = FALSE;
1554 #endif
1555                         break;
1556                     case WINDOWS_31J:
1557 #ifdef UTF8_OUTPUT_ENABLE
1558                         ms_ucs_map_f = UCS_MAP_CP932;
1559 #endif
1560                         break;
1561                     case CP10001:
1562 #ifdef UTF8_OUTPUT_ENABLE
1563                         ms_ucs_map_f = UCS_MAP_CP10001;
1564 #endif
1565                         break;
1566                     case CP51932:
1567 #ifdef SHIFTJIS_CP932
1568                         cp932inv_f = FALSE;
1569 #endif
1570 #ifdef UTF8_OUTPUT_ENABLE
1571                         ms_ucs_map_f = UCS_MAP_CP932;
1572 #endif
1573                         break;
1574                     case EUCJP_MS:
1575 #ifdef X0212_ENABLE
1576                         x0212_f = TRUE;
1577 #endif
1578 #ifdef UTF8_OUTPUT_ENABLE
1579                         ms_ucs_map_f = UCS_MAP_MS;
1580 #endif
1581                         break;
1582                     case EUCJP_ASCII:
1583 #ifdef X0212_ENABLE
1584                         x0212_f = TRUE;
1585 #endif
1586 #ifdef UTF8_OUTPUT_ENABLE
1587                         ms_ucs_map_f = UCS_MAP_ASCII;
1588 #endif
1589                         break;
1590                     case SHIFT_JISX0213:
1591                     case SHIFT_JIS_2004:
1592                         x0213_f = TRUE;
1593 #ifdef SHIFTJIS_CP932
1594                         cp932inv_f = FALSE;
1595 #endif
1596                         break;
1597                     case EUC_JISX0213:
1598                     case EUC_JIS_2004:
1599 #ifdef X0212_ENABLE
1600                         x0212_f = TRUE;
1601 #endif
1602                         x0213_f = TRUE;
1603 #ifdef SHIFTJIS_CP932
1604                         cp932inv_f = FALSE;
1605 #endif
1606                         break;
1607 #ifdef UTF8_OUTPUT_ENABLE
1608                     case UTF_8_BOM:
1609                         output_bom_f = TRUE;
1610                         break;
1611                     case UTF_16:
1612                     case UTF_16BE_BOM:
1613                         output_bom_f = TRUE;
1614                         break;
1615                     case UTF_16LE:
1616                         output_endian = ENDIAN_LITTLE;
1617                         output_bom_f = FALSE;
1618                         break;
1619                     case UTF_16LE_BOM:
1620                         output_endian = ENDIAN_LITTLE;
1621                         output_bom_f = TRUE;
1622                         break;
1623                     case UTF_32BE_BOM:
1624                         output_bom_f = TRUE;
1625                         break;
1626                     case UTF_32LE:
1627                         output_endian = ENDIAN_LITTLE;
1628                         output_bom_f = FALSE;
1629                         break;
1630                     case UTF_32LE_BOM:
1631                         output_endian = ENDIAN_LITTLE;
1632                         output_bom_f = TRUE;
1633                         break;
1634 #endif
1635                     }
1636                     continue;
1637                 }
1638                 if (strcmp(long_option[i].name, "guess=") == 0){
1639                     if (p[0] == '0' || p[0] == '1') {
1640                         guess_f = 1;
1641                     } else {
1642                         guess_f = 2;
1643                     }
1644                     continue;
1645                 }
1646 #ifdef OVERWRITE
1647                 if (strcmp(long_option[i].name, "overwrite") == 0){
1648                     file_out_f = TRUE;
1649                     overwrite_f = TRUE;
1650                     preserve_time_f = TRUE;
1651                     continue;
1652                 }
1653                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1654                     file_out_f = TRUE;
1655                     overwrite_f = TRUE;
1656                     preserve_time_f = TRUE;
1657                     backup_f = TRUE;
1658                     backup_suffix = malloc(strlen((char *) p) + 1);
1659                     strcpy(backup_suffix, (char *) p);
1660                     continue;
1661                 }
1662                 if (strcmp(long_option[i].name, "in-place") == 0){
1663                     file_out_f = TRUE;
1664                     overwrite_f = TRUE;
1665                     preserve_time_f = FALSE;
1666                     continue;
1667                 }
1668                 if (strcmp(long_option[i].name, "in-place=") == 0){
1669                     file_out_f = TRUE;
1670                     overwrite_f = TRUE;
1671                     preserve_time_f = FALSE;
1672                     backup_f = TRUE;
1673                     backup_suffix = malloc(strlen((char *) p) + 1);
1674                     strcpy(backup_suffix, (char *) p);
1675                     continue;
1676                 }
1677 #endif
1678 #ifdef INPUT_OPTION
1679                 if (strcmp(long_option[i].name, "cap-input") == 0){
1680                     cap_f = TRUE;
1681                     continue;
1682                 }
1683                 if (strcmp(long_option[i].name, "url-input") == 0){
1684                     url_f = TRUE;
1685                     continue;
1686                 }
1687 #endif
1688 #ifdef NUMCHAR_OPTION
1689                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1690                     numchar_f = TRUE;
1691                     continue;
1692                 }
1693 #endif
1694 #ifdef CHECK_OPTION
1695                 if (strcmp(long_option[i].name, "no-output") == 0){
1696                     noout_f = TRUE;
1697                     continue;
1698                 }
1699                 if (strcmp(long_option[i].name, "debug") == 0){
1700                     debug_f = TRUE;
1701                     continue;
1702                 }
1703 #endif
1704                 if (strcmp(long_option[i].name, "cp932") == 0){
1705 #ifdef SHIFTJIS_CP932
1706                     cp51932_f = TRUE;
1707                     cp932inv_f = TRUE;
1708 #endif
1709 #ifdef UTF8_OUTPUT_ENABLE
1710                     ms_ucs_map_f = UCS_MAP_CP932;
1711 #endif
1712                     continue;
1713                 }
1714                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1715 #ifdef SHIFTJIS_CP932
1716                     cp51932_f = FALSE;
1717                     cp932inv_f = FALSE;
1718 #endif
1719 #ifdef UTF8_OUTPUT_ENABLE
1720                     ms_ucs_map_f = UCS_MAP_ASCII;
1721 #endif
1722                     continue;
1723                 }
1724 #ifdef SHIFTJIS_CP932
1725                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1726                     cp932inv_f = TRUE;
1727                     continue;
1728                 }
1729 #endif
1730
1731 #ifdef X0212_ENABLE
1732                 if (strcmp(long_option[i].name, "x0212") == 0){
1733                     x0212_f = TRUE;
1734                     continue;
1735                 }
1736 #endif
1737
1738 #ifdef EXEC_IO
1739                   if (strcmp(long_option[i].name, "exec-in") == 0){
1740                       exec_f = 1;
1741                       return;
1742                   }
1743                   if (strcmp(long_option[i].name, "exec-out") == 0){
1744                       exec_f = -1;
1745                       return;
1746                   }
1747 #endif
1748 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1749                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1750                     no_cp932ext_f = TRUE;
1751                     continue;
1752                 }
1753                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1754                     no_best_fit_chars_f = TRUE;
1755                     continue;
1756                 }
1757                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1758                     encode_fallback = NULL;
1759                     continue;
1760                 }
1761                 if (strcmp(long_option[i].name, "fb-html") == 0){
1762                     encode_fallback = encode_fallback_html;
1763                     continue;
1764                 }
1765                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1766                     encode_fallback = encode_fallback_xml;
1767                     continue;
1768                 }
1769                 if (strcmp(long_option[i].name, "fb-java") == 0){
1770                     encode_fallback = encode_fallback_java;
1771                     continue;
1772                 }
1773                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1774                     encode_fallback = encode_fallback_perl;
1775                     continue;
1776                 }
1777                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1778                     encode_fallback = encode_fallback_subchar;
1779                     continue;
1780                 }
1781                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1782                     encode_fallback = encode_fallback_subchar;
1783                     unicode_subchar = 0;
1784                     if (p[0] != '0'){
1785                         /* decimal number */
1786                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1787                             unicode_subchar *= 10;
1788                             unicode_subchar += hex2bin(p[i]);
1789                         }
1790                     }else if(p[1] == 'x' || p[1] == 'X'){
1791                         /* hexadecimal number */
1792                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1793                             unicode_subchar <<= 4;
1794                             unicode_subchar |= hex2bin(p[i]);
1795                         }
1796                     }else{
1797                         /* octal number */
1798                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1799                             unicode_subchar *= 8;
1800                             unicode_subchar += hex2bin(p[i]);
1801                         }
1802                     }
1803                     w16e_conv(unicode_subchar, &i, &j);
1804                     unicode_subchar = i<<8 | j;
1805                     continue;
1806                 }
1807 #endif
1808 #ifdef UTF8_OUTPUT_ENABLE
1809                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1810                     ms_ucs_map_f = UCS_MAP_MS;
1811                     continue;
1812                 }
1813 #endif
1814 #ifdef UNICODE_NORMALIZATION
1815                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1816                     nfc_f = TRUE;
1817                     continue;
1818                 }
1819 #endif
1820                 if (strcmp(long_option[i].name, "prefix=") == 0){
1821                     if (nkf_isgraph(p[0])){
1822                         for (i = 1; nkf_isgraph(p[i]); i++){
1823                             prefix_table[p[i]] = p[0];
1824                         }
1825                     }
1826                     continue;
1827                 }
1828             }
1829             continue;
1830         case 'b':           /* buffered mode */
1831             unbuf_f = FALSE;
1832             continue;
1833         case 'u':           /* non bufferd mode */
1834             unbuf_f = TRUE;
1835             continue;
1836         case 't':           /* transparent mode */
1837             if (*cp=='1') {
1838                 /* alias of -t */
1839                 cp++;
1840                 nop_f = TRUE;
1841             } else if (*cp=='2') {
1842                 /*
1843                  * -t with put/get
1844                  *
1845                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1846                  *
1847                  */
1848                 cp++;
1849                 nop_f = 2;
1850             } else
1851                 nop_f = TRUE;
1852             continue;
1853         case 'j':           /* JIS output */
1854         case 'n':
1855             output_encoding = nkf_enc_from_index(ISO_2022_JP);
1856             continue;
1857         case 'e':           /* AT&T EUC output */
1858             cp932inv_f = FALSE;
1859             output_encoding = nkf_enc_from_index(EUC_JP);
1860             continue;
1861         case 's':           /* SJIS output */
1862             output_encoding = nkf_enc_from_index(WINDOWS_31J);
1863             continue;
1864         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1865             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1866             input_encoding = nkf_enc_from_index(ISO_8859_1);
1867             continue;
1868         case 'i':           /* Kanji IN ESC-$-@/B */
1869             if (*cp=='@'||*cp=='B')
1870                 kanji_intro = *cp++;
1871             continue;
1872         case 'o':           /* ASCII IN ESC-(-J/B */
1873             if (*cp=='J'||*cp=='B'||*cp=='H')
1874                 ascii_intro = *cp++;
1875             continue;
1876         case 'h':
1877             /*
1878                 bit:1   katakana->hiragana
1879                 bit:2   hiragana->katakana
1880             */
1881             if ('9'>= *cp && *cp>='0')
1882                 hira_f |= (*cp++ -'0');
1883             else
1884                 hira_f |= 1;
1885             continue;
1886         case 'r':
1887             rot_f = TRUE;
1888             continue;
1889 #if defined(MSDOS) || defined(__OS2__)
1890         case 'T':
1891             binmode_f = FALSE;
1892             continue;
1893 #endif
1894 #ifndef PERL_XS
1895         case 'V':
1896             show_configuration();
1897             exit(1);
1898             break;
1899         case 'v':
1900             usage();
1901             exit(1);
1902             break;
1903 #endif
1904 #ifdef UTF8_OUTPUT_ENABLE
1905         case 'w':           /* UTF-8 output */
1906             if (cp[0] == '8') {
1907                 cp++;
1908                 if (cp[0] == '0'){
1909                     cp++;
1910                     output_encoding = nkf_enc_from_index(UTF_8N);
1911                 } else {
1912                     output_bom_f = TRUE;
1913                     output_encoding = nkf_enc_from_index(UTF_8_BOM);
1914                 }
1915             } else {
1916                 int enc_idx;
1917                 if ('1'== cp[0] && '6'==cp[1]) {
1918                     cp += 2;
1919                     enc_idx = UTF_16;
1920                 } else if ('3'== cp[0] && '2'==cp[1]) {
1921                     cp += 2;
1922                     enc_idx = UTF_32;
1923                 } else {
1924                     output_encoding = nkf_enc_from_index(UTF_8);
1925                     continue;
1926                 }
1927                 if (cp[0]=='L') {
1928                     cp++;
1929                     output_endian = ENDIAN_LITTLE;
1930                 } else if (cp[0] == 'B') {
1931                     cp++;
1932                 } else {
1933                     output_encoding = nkf_enc_from_index(enc_idx);
1934                     continue;
1935                 }
1936                 if (cp[0] == '0'){
1937                     cp++;
1938                     enc_idx = enc_idx == UTF_16
1939                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1940                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1941                 } else {
1942                     output_bom_f = TRUE;
1943                     enc_idx = enc_idx == UTF_16
1944                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
1945                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
1946                 }
1947                 output_encoding = nkf_enc_from_index(enc_idx);
1948             }
1949             continue;
1950 #endif
1951 #ifdef UTF8_INPUT_ENABLE
1952         case 'W':           /* UTF input */
1953             if (cp[0] == '8') {
1954                 cp++;
1955                 input_encoding = nkf_enc_from_index(UTF_8);
1956             }else{
1957                 int enc_idx;
1958                 if ('1'== cp[0] && '6'==cp[1]) {
1959                     cp += 2;
1960                     input_endian = ENDIAN_BIG;
1961                     enc_idx = UTF_16;
1962                 } else if ('3'== cp[0] && '2'==cp[1]) {
1963                     cp += 2;
1964                     input_endian = ENDIAN_BIG;
1965                     enc_idx = UTF_32;
1966                 } else {
1967                     input_encoding = nkf_enc_from_index(UTF_8);
1968                     continue;
1969                 }
1970                 if (cp[0]=='L') {
1971                     cp++;
1972                     input_endian = ENDIAN_LITTLE;
1973                 } else if (cp[0] == 'B') {
1974                     cp++;
1975                     input_endian = ENDIAN_BIG;
1976                 }
1977                 enc_idx = enc_idx == UTF_16
1978                     ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1979                     : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1980                 input_encoding = nkf_enc_from_index(enc_idx);
1981             }
1982             continue;
1983 #endif
1984         /* Input code assumption */
1985         case 'J':   /* ISO-2022-JP input */
1986             input_encoding = nkf_enc_from_index(ISO_2022_JP);
1987             continue;
1988         case 'E':   /* EUC-JP input */
1989             input_encoding = nkf_enc_from_index(EUC_JP);
1990             continue;
1991         case 'S':   /* Windows-31J input */
1992             input_encoding = nkf_enc_from_index(WINDOWS_31J);
1993             continue;
1994         case 'Z':   /* Convert X0208 alphabet to asii */
1995             /* alpha_f
1996                bit:0   Convert JIS X 0208 Alphabet to ASCII
1997                bit:1   Convert Kankaku to one space
1998                bit:2   Convert Kankaku to two spaces
1999                bit:3   Convert HTML Entity
2000                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
2001             */
2002             while ('0'<= *cp && *cp <='9') {
2003                 alpha_f |= 1 << (*cp++ - '0');
2004             }
2005             if (!alpha_f) alpha_f = 1;
2006             continue;
2007         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
2008             x0201_f = FALSE;    /* No X0201->X0208 conversion */
2009             /* accept  X0201
2010                     ESC-(-I     in JIS, EUC, MS Kanji
2011                     SI/SO       in JIS, EUC, MS Kanji
2012                     SSO         in EUC, JIS, not in MS Kanji
2013                     MS Kanji (0xa0-0xdf)
2014                output  X0201
2015                     ESC-(-I     in JIS (0x20-0x5f)
2016                     SSO         in EUC (0xa0-0xdf)
2017                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
2018             */
2019             continue;
2020         case 'X':   /* Convert X0201 kana to X0208 */
2021             x0201_f = TRUE;
2022             continue;
2023         case 'F':   /* prserve new lines */
2024             fold_preserve_f = TRUE;
2025         case 'f':   /* folding -f60 or -f */
2026             fold_f = TRUE;
2027             fold_len = 0;
2028             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2029                 fold_len *= 10;
2030                 fold_len += *cp++ - '0';
2031             }
2032             if (!(0<fold_len && fold_len<BUFSIZ))
2033                 fold_len = DEFAULT_FOLD;
2034             if (*cp=='-') {
2035                 fold_margin = 0;
2036                 cp++;
2037                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2038                     fold_margin *= 10;
2039                     fold_margin += *cp++ - '0';
2040                 }
2041             }
2042             continue;
2043         case 'm':   /* MIME support */
2044             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
2045             if (*cp=='B'||*cp=='Q') {
2046                 mime_decode_mode = *cp++;
2047                 mimebuf_f = FIXED_MIME;
2048             } else if (*cp=='N') {
2049                 mime_f = TRUE; cp++;
2050             } else if (*cp=='S') {
2051                 mime_f = STRICT_MIME; cp++;
2052             } else if (*cp=='0') {
2053                 mime_decode_f = FALSE;
2054                 mime_f = FALSE; cp++;
2055             }
2056             continue;
2057         case 'M':   /* MIME output */
2058             if (*cp=='B') {
2059                 mimeout_mode = 'B';
2060                 mimeout_f = FIXED_MIME; cp++;
2061             } else if (*cp=='Q') {
2062                 mimeout_mode = 'Q';
2063                 mimeout_f = FIXED_MIME; cp++;
2064             } else {
2065                 mimeout_f = TRUE;
2066             }
2067             continue;
2068         case 'B':   /* Broken JIS support */
2069             /*  bit:0   no ESC JIS
2070                 bit:1   allow any x on ESC-(-x or ESC-$-x
2071                 bit:2   reset to ascii on NL
2072             */
2073             if ('9'>= *cp && *cp>='0')
2074                 broken_f |= 1<<(*cp++ -'0');
2075             else
2076                 broken_f |= TRUE;
2077             continue;
2078 #ifndef PERL_XS
2079         case 'O':/* for Output file */
2080             file_out_f = TRUE;
2081             continue;
2082 #endif
2083         case 'c':/* add cr code */
2084             nlmode_f = CRLF;
2085             continue;
2086         case 'd':/* delete cr code */
2087             nlmode_f = LF;
2088             continue;
2089         case 'I':   /* ISO-2022-JP output */
2090             iso2022jp_f = TRUE;
2091             continue;
2092         case 'L':  /* line mode */
2093             if (*cp=='u') {         /* unix */
2094                 nlmode_f = LF; cp++;
2095             } else if (*cp=='m') { /* mac */
2096                 nlmode_f = CR; cp++;
2097             } else if (*cp=='w') { /* windows */
2098                 nlmode_f = CRLF; cp++;
2099             } else if (*cp=='0') { /* no conversion  */
2100                 nlmode_f = 0; cp++;
2101             }
2102             continue;
2103 #ifndef PERL_XS
2104         case 'g':
2105             if ('2' <= *cp && *cp <= '9') {
2106                 guess_f = 2;
2107                 cp++;
2108             } else if (*cp == '0' || *cp == '1') {
2109                 guess_f = 1;
2110                 cp++;
2111             } else {
2112                 guess_f = 1;
2113             }
2114             continue;
2115 #endif
2116         case SP:
2117         /* module muliple options in a string are allowed for Perl moudle  */
2118             while(*cp && *cp++!='-');
2119             continue;
2120         default:
2121             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
2122             /* bogus option but ignored */
2123             continue;
2124         }
2125     }
2126 }
2127
2128 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2129 {
2130     if (iconv_func){
2131         struct input_code *p = input_code_list;
2132         while (p->name){
2133             if (iconv_func == p->iconv_func){
2134                 return p;
2135             }
2136             p++;
2137         }
2138     }
2139     return 0;
2140 }
2141
2142 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2143 {
2144 #ifdef INPUT_CODE_FIX
2145     if (f || !input_encoding)
2146 #endif
2147         if (estab_f != f){
2148             estab_f = f;
2149         }
2150
2151     if (iconv_func
2152 #ifdef INPUT_CODE_FIX
2153         && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
2154 #endif
2155         ){
2156         iconv = iconv_func;
2157     }
2158 #ifdef CHECK_OPTION
2159     if (estab_f && iconv_for_check != iconv){
2160         struct input_code *p = find_inputcode_byfunc(iconv);
2161         if (p){
2162             set_input_codename(p->name);
2163             debug(p->name);
2164         }
2165         iconv_for_check = iconv;
2166     }
2167 #endif
2168 }
2169
2170 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
2171 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
2172 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
2173 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
2174 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2175 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
2176 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
2177 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
2178
2179 #define SCORE_INIT (SCORE_iMIME)
2180
2181 static const char score_table_A0[] = {
2182     0, 0, 0, 0,
2183     0, 0, 0, 0,
2184     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2185     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2186 };
2187
2188 static const char score_table_F0[] = {
2189     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2190     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2191     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2192     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2193 };
2194
2195 void set_code_score(struct input_code *ptr, nkf_char score)
2196 {
2197     if (ptr){
2198         ptr->score |= score;
2199     }
2200 }
2201
2202 void clr_code_score(struct input_code *ptr, nkf_char score)
2203 {
2204     if (ptr){
2205         ptr->score &= ~score;
2206     }
2207 }
2208
2209 void code_score(struct input_code *ptr)
2210 {
2211     nkf_char c2 = ptr->buf[0];
2212 #ifdef UTF8_OUTPUT_ENABLE
2213     nkf_char c1 = ptr->buf[1];
2214 #endif
2215     if (c2 < 0){
2216         set_code_score(ptr, SCORE_ERROR);
2217     }else if (c2 == SSO){
2218         set_code_score(ptr, SCORE_KANA);
2219     }else if (c2 == 0x8f){
2220         set_code_score(ptr, SCORE_X0212);
2221 #ifdef UTF8_OUTPUT_ENABLE
2222     }else if (!e2w_conv(c2, c1)){
2223         set_code_score(ptr, SCORE_NO_EXIST);
2224 #endif
2225     }else if ((c2 & 0x70) == 0x20){
2226         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2227     }else if ((c2 & 0x70) == 0x70){
2228         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2229     }else if ((c2 & 0x70) >= 0x50){
2230         set_code_score(ptr, SCORE_L2);
2231     }
2232 }
2233
2234 void status_disable(struct input_code *ptr)
2235 {
2236     ptr->stat = -1;
2237     ptr->buf[0] = -1;
2238     code_score(ptr);
2239     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2240 }
2241
2242 void status_push_ch(struct input_code *ptr, nkf_char c)
2243 {
2244     ptr->buf[ptr->index++] = c;
2245 }
2246
2247 void status_clear(struct input_code *ptr)
2248 {
2249     ptr->stat = 0;
2250     ptr->index = 0;
2251 }
2252
2253 void status_reset(struct input_code *ptr)
2254 {
2255     status_clear(ptr);
2256     ptr->score = SCORE_INIT;
2257 }
2258
2259 void status_reinit(struct input_code *ptr)
2260 {
2261     status_reset(ptr);
2262     ptr->_file_stat = 0;
2263 }
2264
2265 void status_check(struct input_code *ptr, nkf_char c)
2266 {
2267     if (c <= DEL && estab_f){
2268         status_reset(ptr);
2269     }
2270 }
2271
2272 void s_status(struct input_code *ptr, nkf_char c)
2273 {
2274     switch(ptr->stat){
2275       case -1:
2276           status_check(ptr, c);
2277           break;
2278       case 0:
2279           if (c <= DEL){
2280               break;
2281 #ifdef NUMCHAR_OPTION
2282           }else if (is_unicode_capsule(c)){
2283               break;
2284 #endif
2285           }else if (0xa1 <= c && c <= 0xdf){
2286               status_push_ch(ptr, SSO);
2287               status_push_ch(ptr, c);
2288               code_score(ptr);
2289               status_clear(ptr);
2290           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2291               ptr->stat = 1;
2292               status_push_ch(ptr, c);
2293           }else if (0xed <= c && c <= 0xee){
2294               ptr->stat = 3;
2295               status_push_ch(ptr, c);
2296 #ifdef SHIFTJIS_CP932
2297           }else if (is_ibmext_in_sjis(c)){
2298               ptr->stat = 2;
2299               status_push_ch(ptr, c);
2300 #endif /* SHIFTJIS_CP932 */
2301 #ifdef X0212_ENABLE
2302           }else if (0xf0 <= c && c <= 0xfc){
2303               ptr->stat = 1;
2304               status_push_ch(ptr, c);
2305 #endif /* X0212_ENABLE */
2306           }else{
2307               status_disable(ptr);
2308           }
2309           break;
2310       case 1:
2311           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2312               status_push_ch(ptr, c);
2313               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2314               code_score(ptr);
2315               status_clear(ptr);
2316           }else{
2317               status_disable(ptr);
2318           }
2319           break;
2320       case 2:
2321 #ifdef SHIFTJIS_CP932
2322         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2323             status_push_ch(ptr, c);
2324             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2325                 set_code_score(ptr, SCORE_CP932);
2326                 status_clear(ptr);
2327                 break;
2328             }
2329         }
2330 #endif /* SHIFTJIS_CP932 */
2331         status_disable(ptr);
2332           break;
2333       case 3:
2334           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2335               status_push_ch(ptr, c);
2336               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2337             set_code_score(ptr, SCORE_CP932);
2338             status_clear(ptr);
2339           }else{
2340               status_disable(ptr);
2341           }
2342           break;
2343     }
2344 }
2345
2346 void e_status(struct input_code *ptr, nkf_char c)
2347 {
2348     switch (ptr->stat){
2349       case -1:
2350           status_check(ptr, c);
2351           break;
2352       case 0:
2353           if (c <= DEL){
2354               break;
2355 #ifdef NUMCHAR_OPTION
2356           }else if (is_unicode_capsule(c)){
2357               break;
2358 #endif
2359           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2360               ptr->stat = 1;
2361               status_push_ch(ptr, c);
2362 #ifdef X0212_ENABLE
2363           }else if (0x8f == c){
2364               ptr->stat = 2;
2365               status_push_ch(ptr, c);
2366 #endif /* X0212_ENABLE */
2367           }else{
2368               status_disable(ptr);
2369           }
2370           break;
2371       case 1:
2372           if (0xa1 <= c && c <= 0xfe){
2373               status_push_ch(ptr, c);
2374               code_score(ptr);
2375               status_clear(ptr);
2376           }else{
2377               status_disable(ptr);
2378           }
2379           break;
2380 #ifdef X0212_ENABLE
2381       case 2:
2382           if (0xa1 <= c && c <= 0xfe){
2383               ptr->stat = 1;
2384               status_push_ch(ptr, c);
2385           }else{
2386               status_disable(ptr);
2387           }
2388 #endif /* X0212_ENABLE */
2389     }
2390 }
2391
2392 #ifdef UTF8_INPUT_ENABLE
2393 void w_status(struct input_code *ptr, nkf_char c)
2394 {
2395     switch (ptr->stat){
2396       case -1:
2397           status_check(ptr, c);
2398           break;
2399       case 0:
2400           if (c <= DEL){
2401               break;
2402 #ifdef NUMCHAR_OPTION
2403           }else if (is_unicode_capsule(c)){
2404               break;
2405 #endif
2406           }else if (0xc0 <= c && c <= 0xdf){
2407               ptr->stat = 1;
2408               status_push_ch(ptr, c);
2409           }else if (0xe0 <= c && c <= 0xef){
2410               ptr->stat = 2;
2411               status_push_ch(ptr, c);
2412           }else if (0xf0 <= c && c <= 0xf4){
2413               ptr->stat = 3;
2414               status_push_ch(ptr, c);
2415           }else{
2416               status_disable(ptr);
2417           }
2418           break;
2419       case 1:
2420       case 2:
2421           if (0x80 <= c && c <= 0xbf){
2422               status_push_ch(ptr, c);
2423               if (ptr->index > ptr->stat){
2424                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2425                              && ptr->buf[2] == 0xbf);
2426                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2427                            &ptr->buf[0], &ptr->buf[1]);
2428                   if (!bom){
2429                       code_score(ptr);
2430                   }
2431                   status_clear(ptr);
2432               }
2433           }else{
2434               status_disable(ptr);
2435           }
2436           break;
2437       case 3:
2438         if (0x80 <= c && c <= 0xbf){
2439             if (ptr->index < ptr->stat){
2440                 status_push_ch(ptr, c);
2441             } else {
2442                 status_clear(ptr);
2443             }
2444           }else{
2445               status_disable(ptr);
2446           }
2447           break;
2448     }
2449 }
2450 #endif
2451
2452 void code_status(nkf_char c)
2453 {
2454     int action_flag = 1;
2455     struct input_code *result = 0;
2456     struct input_code *p = input_code_list;
2457     while (p->name){
2458         if (!p->status_func) {
2459             ++p;
2460             continue;
2461         }
2462         if (!p->status_func)
2463             continue;
2464         (p->status_func)(p, c);
2465         if (p->stat > 0){
2466             action_flag = 0;
2467         }else if(p->stat == 0){
2468             if (result){
2469                 action_flag = 0;
2470             }else{
2471                 result = p;
2472             }
2473         }
2474         ++p;
2475     }
2476
2477     if (action_flag){
2478         if (result && !estab_f){
2479             set_iconv(TRUE, result->iconv_func);
2480         }else if (c <= DEL){
2481             struct input_code *ptr = input_code_list;
2482             while (ptr->name){
2483                 status_reset(ptr);
2484                 ++ptr;
2485             }
2486         }
2487     }
2488 }
2489
2490 #ifndef WIN32DLL
2491 nkf_char std_getc(FILE *f)
2492 {
2493     if (std_gc_ndx){
2494         return std_gc_buf[--std_gc_ndx];
2495     }
2496     return getc(f);
2497 }
2498 #endif /*WIN32DLL*/
2499
2500 nkf_char std_ungetc(nkf_char c, FILE *f)
2501 {
2502     if (std_gc_ndx == STD_GC_BUFSIZE){
2503         return EOF;
2504     }
2505     std_gc_buf[std_gc_ndx++] = c;
2506     return c;
2507 }
2508
2509 #ifndef WIN32DLL
2510 void std_putc(nkf_char c)
2511 {
2512     if(c!=EOF)
2513       putchar(c);
2514 }
2515 #endif /*WIN32DLL*/
2516
2517 #if !defined(PERL_XS) && !defined(WIN32DLL)
2518 nkf_char noconvert(FILE *f)
2519 {
2520     nkf_char    c;
2521
2522     if (nop_f == 2)
2523         module_connection();
2524     while ((c = (*i_getc)(f)) != EOF)
2525       (*o_putc)(c);
2526     (*o_putc)(EOF);
2527     return 1;
2528 }
2529 #endif
2530
2531 void module_connection(void)
2532 {
2533     if (!output_encoding) output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
2534     oconv = nkf_enc_to_oconv(output_encoding);
2535     o_putc = std_putc;
2536
2537     /* replace continucation module, from output side */
2538
2539     /* output redicrection */
2540 #ifdef CHECK_OPTION
2541     if (noout_f || guess_f){
2542         o_putc = no_putc;
2543     }
2544 #endif
2545     if (mimeout_f) {
2546         o_mputc = o_putc;
2547         o_putc = mime_putc;
2548         if (mimeout_f == TRUE) {
2549             o_base64conv = oconv; oconv = base64_conv;
2550         }
2551         /* base64_count = 0; */
2552     }
2553
2554     if (nlmode_f || guess_f) {
2555         o_nlconv = oconv; oconv = nl_conv;
2556     }
2557     if (rot_f) {
2558         o_rot_conv = oconv; oconv = rot_conv;
2559     }
2560     if (iso2022jp_f) {
2561         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2562     }
2563     if (hira_f) {
2564         o_hira_conv = oconv; oconv = hira_conv;
2565     }
2566     if (fold_f) {
2567         o_fconv = oconv; oconv = fold_conv;
2568         f_line = 0;
2569     }
2570     if (alpha_f || x0201_f) {
2571         o_zconv = oconv; oconv = z_conv;
2572     }
2573
2574     i_getc = std_getc;
2575     i_ungetc = std_ungetc;
2576     /* input redicrection */
2577 #ifdef INPUT_OPTION
2578     if (cap_f){
2579         i_cgetc = i_getc; i_getc = cap_getc;
2580         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2581     }
2582     if (url_f){
2583         i_ugetc = i_getc; i_getc = url_getc;
2584         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2585     }
2586 #endif
2587 #ifdef NUMCHAR_OPTION
2588     if (numchar_f){
2589         i_ngetc = i_getc; i_getc = numchar_getc;
2590         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2591     }
2592 #endif
2593 #ifdef UNICODE_NORMALIZATION
2594     if (nfc_f){
2595         i_nfc_getc = i_getc; i_getc = nfc_getc;
2596         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2597     }
2598 #endif
2599     if (mime_f && mimebuf_f==FIXED_MIME) {
2600         i_mgetc = i_getc; i_getc = mime_getc;
2601         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2602     }
2603     if (broken_f & 1) {
2604         i_bgetc = i_getc; i_getc = broken_getc;
2605         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2606     }
2607     if (input_encoding) {
2608         set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
2609     } else {
2610         set_iconv(FALSE, e_iconv);
2611     }
2612
2613     {
2614         struct input_code *p = input_code_list;
2615         while (p->name){
2616             status_reinit(p++);
2617         }
2618     }
2619 }
2620
2621 /*
2622  * Check and Ignore BOM
2623  */
2624 void check_bom(FILE *f)
2625 {
2626     int c2;
2627     switch(c2 = (*i_getc)(f)){
2628     case 0x00:
2629         if((c2 = (*i_getc)(f)) == 0x00){
2630             if((c2 = (*i_getc)(f)) == 0xFE){
2631                 if((c2 = (*i_getc)(f)) == 0xFF){
2632                     if(!input_encoding){
2633                         set_iconv(TRUE, w_iconv32);
2634                     }
2635                     if (iconv == w_iconv32) {
2636                         input_endian = ENDIAN_BIG;
2637                         return;
2638                     }
2639                     (*i_ungetc)(0xFF,f);
2640                 }else (*i_ungetc)(c2,f);
2641                 (*i_ungetc)(0xFE,f);
2642             }else if(c2 == 0xFF){
2643                 if((c2 = (*i_getc)(f)) == 0xFE){
2644                     if(!input_encoding){
2645                         set_iconv(TRUE, w_iconv32);
2646                     }
2647                     if (iconv == w_iconv32) {
2648                         input_endian = ENDIAN_2143;
2649                         return;
2650                     }
2651                     (*i_ungetc)(0xFF,f);
2652                 }else (*i_ungetc)(c2,f);
2653                 (*i_ungetc)(0xFF,f);
2654             }else (*i_ungetc)(c2,f);
2655             (*i_ungetc)(0x00,f);
2656         }else (*i_ungetc)(c2,f);
2657         (*i_ungetc)(0x00,f);
2658         break;
2659     case 0xEF:
2660         if((c2 = (*i_getc)(f)) == 0xBB){
2661             if((c2 = (*i_getc)(f)) == 0xBF){
2662                 if(!input_encoding){
2663                     set_iconv(TRUE, w_iconv);
2664                 }
2665                 if (iconv == w_iconv) {
2666                     return;
2667                 }
2668                 (*i_ungetc)(0xBF,f);
2669             }else (*i_ungetc)(c2,f);
2670             (*i_ungetc)(0xBB,f);
2671         }else (*i_ungetc)(c2,f);
2672         (*i_ungetc)(0xEF,f);
2673         break;
2674     case 0xFE:
2675         if((c2 = (*i_getc)(f)) == 0xFF){
2676             if((c2 = (*i_getc)(f)) == 0x00){
2677                 if((c2 = (*i_getc)(f)) == 0x00){
2678                     if(!input_encoding){
2679                         set_iconv(TRUE, w_iconv32);
2680                     }
2681                     if (iconv == w_iconv32) {
2682                         input_endian = ENDIAN_3412;
2683                         return;
2684                     }
2685                     (*i_ungetc)(0x00,f);
2686                 }else (*i_ungetc)(c2,f);
2687                 (*i_ungetc)(0x00,f);
2688             }else (*i_ungetc)(c2,f);
2689             if(!input_encoding){
2690                 set_iconv(TRUE, w_iconv16);
2691             }
2692             if (iconv == w_iconv16) {
2693                 input_endian = ENDIAN_BIG;
2694                 return;
2695             }
2696             (*i_ungetc)(0xFF,f);
2697         }else (*i_ungetc)(c2,f);
2698         (*i_ungetc)(0xFE,f);
2699         break;
2700     case 0xFF:
2701         if((c2 = (*i_getc)(f)) == 0xFE){
2702             if((c2 = (*i_getc)(f)) == 0x00){
2703                 if((c2 = (*i_getc)(f)) == 0x00){
2704                     if(!input_encoding){
2705                         set_iconv(TRUE, w_iconv32);
2706                     }
2707                     if (iconv == w_iconv32) {
2708                         input_endian = ENDIAN_LITTLE;
2709                         return;
2710                     }
2711                     (*i_ungetc)(0x00,f);
2712                 }else (*i_ungetc)(c2,f);
2713                 (*i_ungetc)(0x00,f);
2714             }else (*i_ungetc)(c2,f);
2715             if(!input_encoding){
2716                 set_iconv(TRUE, w_iconv16);
2717             }
2718             if (iconv == w_iconv16) {
2719                 input_endian = ENDIAN_LITTLE;
2720                 return;
2721             }
2722             (*i_ungetc)(0xFE,f);
2723         }else (*i_ungetc)(c2,f);
2724         (*i_ungetc)(0xFF,f);
2725         break;
2726     default:
2727         (*i_ungetc)(c2,f);
2728         break;
2729     }
2730 }
2731
2732 /*
2733    Conversion main loop. Code detection only.
2734  */
2735
2736 nkf_char kanji_convert(FILE *f)
2737 {
2738     nkf_char    c3, c2=0, c1, c0=0;
2739     int is_8bit = FALSE;
2740
2741     if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
2742         is_8bit = TRUE;
2743     }
2744
2745     input_mode = ASCII;
2746     output_mode = ASCII;
2747     shift_mode = FALSE;
2748
2749 #define NEXT continue      /* no output, get next */
2750 #define SEND ;             /* output c1 and c2, get next */
2751 #define LAST break         /* end of loop, go closing  */
2752
2753     module_connection();
2754     check_bom(f);
2755
2756     while ((c1 = (*i_getc)(f)) != EOF) {
2757 #ifdef INPUT_CODE_FIX
2758         if (!input_encoding)
2759 #endif
2760             code_status(c1);
2761         if (c2) {
2762             /* second byte */
2763             if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
2764                 /* in case of 8th bit is on */
2765                 if (!estab_f&&!mime_decode_mode) {
2766                     /* in case of not established yet */
2767                     /* It is still ambiguious */
2768                     if (h_conv(f, c2, c1)==EOF)
2769                         LAST;
2770                     else
2771                         c2 = 0;
2772                     NEXT;
2773                 } else {
2774                     /* in case of already established */
2775                     if (c1 < AT) {
2776                         /* ignore bogus code and not CP5022x UCD */
2777                         c2 = 0;
2778                         NEXT;
2779                     } else {
2780                         SEND;
2781                     }
2782                 }
2783             } else
2784                 /* second byte, 7 bit code */
2785                 /* it might be kanji shitfted */
2786                 if ((c1 == DEL) || (c1 <= SP)) {
2787                     /* ignore bogus first code */
2788                     c2 = 0;
2789                     NEXT;
2790                 } else
2791                     SEND;
2792         } else {
2793             /* first byte */
2794 #ifdef UTF8_INPUT_ENABLE
2795             if (iconv == w_iconv16) {
2796                 if (input_endian == ENDIAN_BIG) {
2797                     c2 = c1;
2798                     if ((c1 = (*i_getc)(f)) != EOF) {
2799                         if (0xD8 <= c2 && c2 <= 0xDB) {
2800                             if ((c0 = (*i_getc)(f)) != EOF) {
2801                                 c0 <<= 8;
2802                                 if ((c3 = (*i_getc)(f)) != EOF) {
2803                                     c0 |= c3;
2804                                 } else c2 = EOF;
2805                             } else c2 = EOF;
2806                         }
2807                     } else c2 = EOF;
2808                 } else {
2809                     if ((c2 = (*i_getc)(f)) != EOF) {
2810                         if (0xD8 <= c2 && c2 <= 0xDB) {
2811                             if ((c3 = (*i_getc)(f)) != EOF) {
2812                                 if ((c0 = (*i_getc)(f)) != EOF) {
2813                                     c0 <<= 8;
2814                                     c0 |= c3;
2815                                 } else c2 = EOF;
2816                             } else c2 = EOF;
2817                         }
2818                     } else c2 = EOF;
2819                 }
2820                 SEND;
2821             } else if(iconv == w_iconv32){
2822                 int c3 = c1;
2823                 if((c2 = (*i_getc)(f)) != EOF &&
2824                    (c1 = (*i_getc)(f)) != EOF &&
2825                    (c0 = (*i_getc)(f)) != EOF){
2826                     switch(input_endian){
2827                     case ENDIAN_BIG:
2828                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2829                         break;
2830                     case ENDIAN_LITTLE:
2831                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2832                         break;
2833                     case ENDIAN_2143:
2834                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2835                         break;
2836                     case ENDIAN_3412:
2837                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2838                         break;
2839                     }
2840                     c2 = 0;
2841                 }else{
2842                     c2 = EOF;
2843                 }
2844                 SEND;
2845             } else
2846 #endif
2847 #ifdef NUMCHAR_OPTION
2848             if (is_unicode_capsule(c1)){
2849                 SEND;
2850             } else
2851 #endif
2852             if (c1 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
2853                 /* 8 bit code */
2854                 if (!estab_f && !iso8859_f) {
2855                     /* not established yet */
2856                     c2 = c1;
2857                     NEXT;
2858                 } else { /* estab_f==TRUE */
2859                     if (iso8859_f) {
2860                         c2 = ISO_8859_1;
2861                         c1 &= 0x7f;
2862                         SEND;
2863                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2864                         /* SJIS X0201 Case... */
2865                         if (iso2022jp_f && !x0201_f) {
2866                             (*oconv)(GETA1, GETA2);
2867                             NEXT;
2868                         } else {
2869                             c2 = JIS_X_0201;
2870                             c1 &= 0x7f;
2871                             SEND;
2872                         }
2873                     } else if (c1==SSO && iconv != s_iconv) {
2874                         /* EUC X0201 Case */
2875                         c1 = (*i_getc)(f);  /* skip SSO */
2876                         code_status(c1);
2877                         if (SSP<=c1 && c1<0xe0) {
2878                             if (iso2022jp_f && !x0201_f) {
2879                                 (*oconv)(GETA1, GETA2);
2880                                 NEXT;
2881                             } else {
2882                                 c2 = JIS_X_0201;
2883                                 c1 &= 0x7f;
2884                                 SEND;
2885                             }
2886                         } else  { /* bogus code, skip SSO and one byte */
2887                             NEXT;
2888                         }
2889                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2890                                (c1 == 0xFD || c1 == 0xFE)) {
2891                         /* CP10001 */
2892                         c2 = JIS_X_0201;
2893                         c1 &= 0x7f;
2894                         SEND;
2895                     } else {
2896                        /* already established */
2897                        c2 = c1;
2898                        NEXT;
2899                     }
2900                 }
2901             } else if ((c1 > SP) && (c1 != DEL)) {
2902                 /* in case of Roman characters */
2903                 if (shift_mode) {
2904                     /* output 1 shifted byte */
2905                     if (iso8859_f) {
2906                         c2 = ISO_8859_1;
2907                         SEND;
2908                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2909                       /* output 1 shifted byte */
2910                         if (iso2022jp_f && !x0201_f) {
2911                             (*oconv)(GETA1, GETA2);
2912                             NEXT;
2913                         } else {
2914                             c2 = JIS_X_0201;
2915                             SEND;
2916                         }
2917                     } else {
2918                         /* look like bogus code */
2919                         NEXT;
2920                     }
2921                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
2922                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
2923                     /* in case of Kanji shifted */
2924                     c2 = c1;
2925                     NEXT;
2926                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2927                     /* Check MIME code */
2928                     if ((c1 = (*i_getc)(f)) == EOF) {
2929                         (*oconv)(0, '=');
2930                         LAST;
2931                     } else if (c1 == '?') {
2932                         /* =? is mime conversion start sequence */
2933                         if(mime_f == STRICT_MIME) {
2934                             /* check in real detail */
2935                             if (mime_begin_strict(f) == EOF)
2936                                 LAST;
2937                             else
2938                                 NEXT;
2939                         } else if (mime_begin(f) == EOF)
2940                             LAST;
2941                         else
2942                             NEXT;
2943                     } else {
2944                         (*oconv)(0, '=');
2945                         (*i_ungetc)(c1,f);
2946                         NEXT;
2947                     }
2948                 } else {
2949                     /* normal ASCII code */
2950                     SEND;
2951                 }
2952             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2953                 shift_mode = FALSE;
2954                 NEXT;
2955             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2956                 shift_mode = TRUE;
2957                 NEXT;
2958             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2959                 if ((c1 = (*i_getc)(f)) == EOF) {
2960                     /*  (*oconv)(0, ESC); don't send bogus code */
2961                     LAST;
2962                 } else if (c1 == '$') {
2963                     if ((c1 = (*i_getc)(f)) == EOF) {
2964                         /*
2965                         (*oconv)(0, ESC); don't send bogus code
2966                         (*oconv)(0, '$'); */
2967                         LAST;
2968                     } else if (c1 == '@'|| c1 == 'B') {
2969                         /* This is kanji introduction */
2970                         input_mode = JIS_X_0208;
2971                         shift_mode = FALSE;
2972                         set_input_codename("ISO-2022-JP");
2973 #ifdef CHECK_OPTION
2974                         debug("ISO-2022-JP");
2975 #endif
2976                         NEXT;
2977                     } else if (c1 == '(') {
2978                         if ((c1 = (*i_getc)(f)) == EOF) {
2979                             /* don't send bogus code
2980                             (*oconv)(0, ESC);
2981                             (*oconv)(0, '$');
2982                             (*oconv)(0, '(');
2983                                 */
2984                             LAST;
2985                         } else if (c1 == '@'|| c1 == 'B') {
2986                             /* This is kanji introduction */
2987                             input_mode = JIS_X_0208;
2988                             shift_mode = FALSE;
2989                             NEXT;
2990 #ifdef X0212_ENABLE
2991                         } else if (c1 == 'D'){
2992                             input_mode = JIS_X_0212;
2993                             shift_mode = FALSE;
2994                             NEXT;
2995 #endif /* X0212_ENABLE */
2996                         } else if (c1 == 0x4F){
2997                             input_mode = JIS_X_0213_1;
2998                             shift_mode = FALSE;
2999                             NEXT;
3000                         } else if (c1 == 0x50){
3001                             input_mode = JIS_X_0213_2;
3002                             shift_mode = FALSE;
3003                             NEXT;
3004                         } else {
3005                             /* could be some special code */
3006                             (*oconv)(0, ESC);
3007                             (*oconv)(0, '$');
3008                             (*oconv)(0, '(');
3009                             (*oconv)(0, c1);
3010                             NEXT;
3011                         }
3012                     } else if (broken_f&0x2) {
3013                         /* accept any ESC-(-x as broken code ... */
3014                         input_mode = JIS_X_0208;
3015                         shift_mode = FALSE;
3016                         NEXT;
3017                     } else {
3018                         (*oconv)(0, ESC);
3019                         (*oconv)(0, '$');
3020                         (*oconv)(0, c1);
3021                         NEXT;
3022                     }
3023                 } else if (c1 == '(') {
3024                     if ((c1 = (*i_getc)(f)) == EOF) {
3025                         /* don't send bogus code
3026                         (*oconv)(0, ESC);
3027                         (*oconv)(0, '('); */
3028                         LAST;
3029                     } else {
3030                         if (c1 == 'I') {
3031                             /* This is X0201 kana introduction */
3032                             input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
3033                             NEXT;
3034                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
3035                             /* This is X0208 kanji introduction */
3036                             input_mode = ASCII; shift_mode = FALSE;
3037                             NEXT;
3038                         } else if (broken_f&0x2) {
3039                             input_mode = ASCII; shift_mode = FALSE;
3040                             NEXT;
3041                         } else {
3042                             (*oconv)(0, ESC);
3043                             (*oconv)(0, '(');
3044                             /* maintain various input_mode here */
3045                             SEND;
3046                         }
3047                     }
3048                } else if ( c1 == 'N' || c1 == 'n'){
3049                    /* SS2 */
3050                    c3 = (*i_getc)(f);  /* skip SS2 */
3051                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
3052                        c1 = c3;
3053                        c2 = JIS_X_0201;
3054                        SEND;
3055                    }else{
3056                        (*i_ungetc)(c3, f);
3057                        /* lonely ESC  */
3058                        (*oconv)(0, ESC);
3059                        SEND;
3060                    }
3061                 } else {
3062                     /* lonely ESC  */
3063                     (*oconv)(0, ESC);
3064                     SEND;
3065                 }
3066             } else if (c1 == ESC && iconv == s_iconv) {
3067                 /* ESC in Shift_JIS */
3068                 if ((c1 = (*i_getc)(f)) == EOF) {
3069                     /*  (*oconv)(0, ESC); don't send bogus code */
3070                     LAST;
3071                 } else if (c1 == '$') {
3072                     /* J-PHONE emoji */
3073                     if ((c1 = (*i_getc)(f)) == EOF) {
3074                         /*
3075                            (*oconv)(0, ESC); don't send bogus code
3076                            (*oconv)(0, '$'); */
3077                         LAST;
3078                     } else {
3079                         if (('E' <= c1 && c1 <= 'G') ||
3080                             ('O' <= c1 && c1 <= 'Q')) {
3081                             /*
3082                                NUM : 0 1 2 3 4 5
3083                                BYTE: G E F O P Q
3084                                C%7 : 1 6 0 2 3 4
3085                                C%7 : 0 1 2 3 4 5 6
3086                                NUM : 2 0 3 4 5 X 1
3087                              */
3088                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
3089                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
3090                             while ((c1 = (*i_getc)(f)) != EOF) {
3091                                 if (SP <= c1 && c1 <= 'z') {
3092                                     (*oconv)(0, c1 + c0);
3093                                 } else break; /* c1 == SO */
3094                             }
3095                         }
3096                     }
3097                     if (c1 == EOF) LAST;
3098                     NEXT;
3099                 } else {
3100                     /* lonely ESC  */
3101                     (*oconv)(0, ESC);
3102                     SEND;
3103                 }
3104             } else if (c1 == LF || c1 == CR) {
3105                 if (broken_f&4) {
3106                     input_mode = ASCII; set_iconv(FALSE, 0);
3107                     SEND;
3108                 } else if (mime_decode_f && !mime_decode_mode){
3109                     if (c1 == LF) {
3110                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
3111                             i_ungetc(SP,f);
3112                             continue;
3113                         } else {
3114                             i_ungetc(c1,f);
3115                         }
3116                         c1 = LF;
3117                         SEND;
3118                     } else  { /* if (c1 == CR)*/
3119                         if ((c1=(*i_getc)(f))!=EOF) {
3120                             if (c1==SP) {
3121                                 i_ungetc(SP,f);
3122                                 continue;
3123                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
3124                                 i_ungetc(SP,f);
3125                                 continue;
3126                             } else {
3127                                 i_ungetc(c1,f);
3128                             }
3129                             i_ungetc(LF,f);
3130                         } else {
3131                             i_ungetc(c1,f);
3132                         }
3133                         c1 = CR;
3134                         SEND;
3135                     }
3136                 }
3137             } else if (c1 == DEL && input_mode == JIS_X_0208) {
3138                 /* CP5022x */
3139                 c2 = c1;
3140                 NEXT;
3141             } else
3142                 SEND;
3143         }
3144         /* send: */
3145         switch(input_mode){
3146         case ASCII:
3147             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
3148             case -2:
3149                 /* 4 bytes UTF-8 */
3150                 if ((c0 = (*i_getc)(f)) != EOF) {
3151                     code_status(c0);
3152                     c0 <<= 8;
3153                     if ((c3 = (*i_getc)(f)) != EOF) {
3154                         code_status(c3);
3155                         (*iconv)(c2, c1, c0|c3);
3156                     }
3157                 }
3158                 break;
3159             case -1:
3160                 /* 3 bytes EUC or UTF-8 */
3161                 if ((c0 = (*i_getc)(f)) != EOF) {
3162                     code_status(c0);
3163                     (*iconv)(c2, c1, c0);
3164                 }
3165                 break;
3166             }
3167             break;
3168         case JIS_X_0208:
3169         case JIS_X_0213_1:
3170             if (ms_ucs_map_f &&
3171                 0x7F <= c2 && c2 <= 0x92 &&
3172                 0x21 <= c1 && c1 <= 0x7E) {
3173                 /* CP932 UDC */
3174                 if(c1 == 0x7F) return 0;
3175                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
3176                 c2 = 0;
3177             }
3178             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
3179             break;
3180 #ifdef X0212_ENABLE
3181         case JIS_X_0212:
3182             (*oconv)(PREFIX_EUCG3 | c2, c1);
3183             break;
3184 #endif /* X0212_ENABLE */
3185         case JIS_X_0213_2:
3186             (*oconv)(PREFIX_EUCG3 | c2, c1);
3187             break;
3188         default:
3189             (*oconv)(input_mode, c1);  /* other special case */
3190         }
3191
3192         c2 = 0;
3193         c0 = 0;
3194         continue;
3195         /* goto next_word */
3196     }
3197
3198     /* epilogue */
3199     (*iconv)(EOF, 0, 0);
3200     if (!input_codename)
3201     {
3202         if (is_8bit) {
3203             struct input_code *p = input_code_list;
3204             struct input_code *result = p;
3205             while (p->name){
3206                 if (p->score < result->score) result = p;
3207                 ++p;
3208             }
3209             set_input_codename(result->name);
3210 #ifdef CHECK_OPTION
3211             debug(result->name);
3212 #endif
3213         }
3214     }
3215     return 1;
3216 }
3217
3218 nkf_char
3219 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3220 {
3221     nkf_char ret, c3, c0;
3222     int hold_index;
3223
3224
3225     /** it must NOT be in the kanji shifte sequence      */
3226     /** it must NOT be written in JIS7                   */
3227     /** and it must be after 2 byte 8bit code            */
3228
3229     hold_count = 0;
3230     push_hold_buf(c2);
3231     push_hold_buf(c1);
3232
3233     while ((c1 = (*i_getc)(f)) != EOF) {
3234         if (c1 == ESC){
3235             (*i_ungetc)(c1,f);
3236             break;
3237         }
3238         code_status(c1);
3239         if (push_hold_buf(c1) == EOF || estab_f){
3240             break;
3241         }
3242     }
3243
3244     if (!estab_f){
3245         struct input_code *p = input_code_list;
3246         struct input_code *result = p;
3247         if (c1 == EOF){
3248             code_status(c1);
3249         }
3250         while (p->name){
3251             if (p->status_func && p->score < result->score){
3252                 result = p;
3253             }
3254             ++p;
3255         }
3256         set_iconv(TRUE, result->iconv_func);
3257     }
3258
3259
3260     /** now,
3261      ** 1) EOF is detected, or
3262      ** 2) Code is established, or
3263      ** 3) Buffer is FULL (but last word is pushed)
3264      **
3265      ** in 1) and 3) cases, we continue to use
3266      ** Kanji codes by oconv and leave estab_f unchanged.
3267      **/
3268
3269     ret = c1;
3270     hold_index = 0;
3271     while (hold_index < hold_count){
3272         c2 = hold_buf[hold_index++];
3273         if (c2 <= DEL
3274 #ifdef NUMCHAR_OPTION
3275             || is_unicode_capsule(c2)
3276 #endif
3277             ){
3278             (*iconv)(0, c2, 0);
3279             continue;
3280         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3281             (*iconv)(JIS_X_0201, c2, 0);
3282             continue;
3283         }
3284         if (hold_index < hold_count){
3285             c1 = hold_buf[hold_index++];
3286         }else{
3287             c1 = (*i_getc)(f);
3288             if (c1 == EOF){
3289                 c3 = EOF;
3290                 break;
3291             }
3292             code_status(c1);
3293         }
3294         c0 = 0;
3295         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3296         case -2:
3297             /* 4 bytes UTF-8 */
3298             if (hold_index < hold_count){
3299                 c0 = hold_buf[hold_index++];
3300             } else if ((c0 = (*i_getc)(f)) == EOF) {
3301                 ret = EOF;
3302                 break;
3303             } else {
3304                 code_status(c0);
3305                 c0 <<= 8;
3306                 if (hold_index < hold_count){
3307                     c3 = hold_buf[hold_index++];
3308                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3309                     c0 = ret = EOF;
3310                     break;
3311                 } else {
3312                     code_status(c3);
3313                     (*iconv)(c2, c1, c0|c3);
3314                 }
3315             }
3316             break;
3317         case -1:
3318             /* 3 bytes EUC or UTF-8 */
3319             if (hold_index < hold_count){
3320                 c0 = hold_buf[hold_index++];
3321             } else if ((c0 = (*i_getc)(f)) == EOF) {
3322                 ret = EOF;
3323                 break;
3324             } else {
3325                 code_status(c0);
3326             }
3327             (*iconv)(c2, c1, c0);
3328             break;
3329         }
3330         if (c0 == EOF) break;
3331     }
3332     return ret;
3333 }
3334
3335 nkf_char push_hold_buf(nkf_char c2)
3336 {
3337     if (hold_count >= HOLD_SIZE*2)
3338         return (EOF);
3339     hold_buf[hold_count++] = (unsigned char)c2;
3340     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3341 }
3342
3343 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3344 {
3345 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3346     nkf_char val;
3347 #endif
3348     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3349 #ifdef SHIFTJIS_CP932
3350     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3351         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3352         if (val){
3353             c2 = val >> 8;
3354             c1 = val & 0xff;
3355         }
3356     }
3357     if (cp932inv_f
3358         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3359         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3360         if (c){
3361             c2 = c >> 8;
3362             c1 = c & 0xff;
3363         }
3364     }
3365 #endif /* SHIFTJIS_CP932 */
3366 #ifdef X0212_ENABLE
3367     if (!x0213_f && is_ibmext_in_sjis(c2)){
3368         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3369         if (val){
3370             if (val > 0x7FFF){
3371                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3372                 c1 = val & 0xff;
3373             }else{
3374                 c2 = val >> 8;
3375                 c1 = val & 0xff;
3376             }
3377             if (p2) *p2 = c2;
3378             if (p1) *p1 = c1;
3379             return 0;
3380         }
3381     }
3382 #endif
3383     if(c2 >= 0x80){
3384         if(x0213_f && c2 >= 0xF0){
3385             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3386                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3387             }else{ /* 78<=k<=94 */
3388                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3389                 if (0x9E < c1) c2++;
3390             }
3391         }else{
3392             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3393             if (0x9E < c1) c2++;
3394         }
3395         if (c1 < 0x9F)
3396             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3397         else {
3398             c1 = c1 - 0x7E;
3399         }
3400     }
3401
3402 #ifdef X0212_ENABLE
3403     c2 = x0212_unshift(c2);
3404 #endif
3405     if (p2) *p2 = c2;
3406     if (p1) *p1 = c1;
3407     return 0;
3408 }
3409
3410 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3411 {
3412     if (c2 == JIS_X_0201) {
3413         c1 &= 0x7f;
3414     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3415         /* NOP */
3416     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3417         /* CP932 UDC */
3418         if(c1 == 0x7F) return 0;
3419         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3420         c2 = 0;
3421     } else {
3422         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3423         if (ret) return ret;
3424     }
3425     (*oconv)(c2, c1);
3426     return 0;
3427 }
3428
3429 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3430 {
3431     if (c2 == JIS_X_0201) {
3432         c1 &= 0x7f;
3433 #ifdef X0212_ENABLE
3434     }else if (c2 == 0x8f){
3435         if (c0 == 0){
3436             return -1;
3437         }
3438         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3439             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3440             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3441             c2 = 0;
3442         } else {
3443             c2 = (c2 << 8) | (c1 & 0x7f);
3444             c1 = c0 & 0x7f;
3445 #ifdef SHIFTJIS_CP932
3446             if (cp51932_f){
3447                 nkf_char s2, s1;
3448                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3449                     s2e_conv(s2, s1, &c2, &c1);
3450                     if (c2 < 0x100){
3451                         c1 &= 0x7f;
3452                         c2 &= 0x7f;
3453                     }
3454                 }
3455             }
3456 #endif /* SHIFTJIS_CP932 */
3457         }
3458 #endif /* X0212_ENABLE */
3459     } else if (c2 == SSO){
3460         c2 = JIS_X_0201;
3461         c1 &= 0x7f;
3462     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3463         /* NOP */
3464     } else {
3465         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3466             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3467             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3468             c2 = 0;
3469         } else {
3470             c1 &= 0x7f;
3471             c2 &= 0x7f;
3472 #ifdef SHIFTJIS_CP932
3473             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3474                 nkf_char s2, s1;
3475                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3476                     s2e_conv(s2, s1, &c2, &c1);
3477                     if (c2 < 0x100){
3478                         c1 &= 0x7f;
3479                         c2 &= 0x7f;
3480                     }
3481                 }
3482             }
3483 #endif /* SHIFTJIS_CP932 */
3484         }
3485     }
3486     (*oconv)(c2, c1);
3487     return 0;
3488 }
3489
3490 #ifdef UTF8_INPUT_ENABLE
3491 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3492 {
3493     nkf_char ret = 0;
3494
3495     if (!c1){
3496         *p2 = 0;
3497         *p1 = c2;
3498     }else if (0xc0 <= c2 && c2 <= 0xef) {
3499         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3500 #ifdef NUMCHAR_OPTION
3501         if (ret > 0){
3502             if (p2) *p2 = 0;
3503             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3504             ret = 0;
3505         }
3506 #endif
3507     }
3508     return ret;
3509 }
3510
3511 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3512 {
3513     nkf_char ret = 0;
3514     static const char w_iconv_utf8_1st_byte[] =
3515     { /* 0xC0 - 0xFF */
3516         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3517         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3518         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3519         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3520
3521     if (c2 < 0 || 0xff < c2) {
3522     }else if (c2 == 0) { /* 0 : 1 byte*/
3523         c0 = 0;
3524     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3525         return 0;
3526     } else{
3527         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3528         case 21:
3529             if (c1 < 0x80 || 0xBF < c1) return 0;
3530             break;
3531         case 30:
3532             if (c0 == 0) return -1;
3533             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3534                 return 0;
3535             break;
3536         case 31:
3537         case 33:
3538             if (c0 == 0) return -1;
3539             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3540                 return 0;
3541             break;
3542         case 32:
3543             if (c0 == 0) return -1;
3544             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3545                 return 0;
3546             break;
3547         case 40:
3548             if (c0 == 0) return -2;
3549             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3550                 return 0;
3551             break;
3552         case 41:
3553             if (c0 == 0) return -2;
3554             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3555                 return 0;
3556             break;
3557         case 42:
3558             if (c0 == 0) return -2;
3559             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3560                 return 0;
3561             break;
3562         default:
3563             return 0;
3564             break;
3565         }
3566     }
3567     if (c2 == 0 || c2 == EOF){
3568     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3569         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3570         c2 = 0;
3571     } else {
3572         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3573     }
3574     if (ret == 0){
3575         (*oconv)(c2, c1);
3576     }
3577     return ret;
3578 }
3579 #endif
3580
3581 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3582 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3583 {
3584     val &= VALUE_MASK;
3585     if (val < 0x80){
3586         *p2 = val;
3587         *p1 = 0;
3588         *p0 = 0;
3589     }else if (val < 0x800){
3590         *p2 = 0xc0 | (val >> 6);
3591         *p1 = 0x80 | (val & 0x3f);
3592         *p0 = 0;
3593     } else if (val <= NKF_INT32_C(0xFFFF)) {
3594         *p2 = 0xe0 | (val >> 12);
3595         *p1 = 0x80 | ((val >> 6) & 0x3f);
3596         *p0 = 0x80 | (val        & 0x3f);
3597     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3598         *p2 = 0xe0 |  (val >> 16);
3599         *p1 = 0x80 | ((val >> 12) & 0x3f);
3600         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3601     } else {
3602         *p2 = 0;
3603         *p1 = 0;
3604         *p0 = 0;
3605     }
3606 }
3607 #endif
3608
3609 #ifdef UTF8_INPUT_ENABLE
3610 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3611 {
3612     nkf_char val;
3613     if (c2 >= 0xf8) {
3614         val = -1;
3615     } else if (c2 >= 0xf0){
3616         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3617         val = (c2 & 0x0f) << 18;
3618         val |= (c1 & 0x3f) << 12;
3619         val |= (c0 & 0x3f00) >> 2;
3620         val |= (c0 & 0x3f);
3621     }else if (c2 >= 0xe0){
3622         val = (c2 & 0x0f) << 12;
3623         val |= (c1 & 0x3f) << 6;
3624         val |= (c0 & 0x3f);
3625     }else if (c2 >= 0xc0){
3626         val = (c2 & 0x1f) << 6;
3627         val |= (c1 & 0x3f);
3628     }else{
3629         val = c2;
3630     }
3631     return val;
3632 }
3633
3634 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3635 {
3636     nkf_char c2, c1, c0;
3637     nkf_char ret = 0;
3638     val &= VALUE_MASK;
3639     if (val < 0x80){
3640         *p2 = 0;
3641         *p1 = val;
3642     }else{
3643         w16w_conv(val, &c2, &c1, &c0);
3644         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3645 #ifdef NUMCHAR_OPTION
3646         if (ret > 0){
3647             *p2 = 0;
3648             *p1 = CLASS_UNICODE | val;
3649             ret = 0;
3650         }
3651 #endif
3652     }
3653     return ret;
3654 }
3655 #endif
3656
3657 #ifdef UTF8_INPUT_ENABLE
3658 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3659 {
3660     nkf_char ret = 0;
3661     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3662         (*oconv)(c2, c1);
3663         return 0;
3664     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3665         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3666             return -2;
3667         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3668         c2 = 0;
3669     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3670         /*
3671            return 2;
3672         */
3673         return 1;
3674     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3675     if (ret) return ret;
3676     (*oconv)(c2, c1);
3677     return 0;
3678 }
3679
3680 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3681 {
3682     int ret = 0;
3683
3684     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3685     } else if (is_unicode_bmp(c1)) {
3686         ret = w16e_conv(c1, &c2, &c1);
3687     } else {
3688         c2 = 0;
3689         c1 =  CLASS_UNICODE | c1;
3690     }
3691     if (ret) return ret;
3692     (*oconv)(c2, c1);
3693     return 0;
3694 }
3695
3696 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3697 {
3698     const unsigned short *const *pp;
3699     const unsigned short *const *const *ppp;
3700     static const char no_best_fit_chars_table_C2[] =
3701     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3702         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3703         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3704         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3705     static const char no_best_fit_chars_table_C2_ms[] =
3706     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3707         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3708         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3709         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3710     static const char no_best_fit_chars_table_932_C2[] =
3711     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3712         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3713         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3714         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3715     static const char no_best_fit_chars_table_932_C3[] =
3716     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3717         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3718         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3719         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3720     nkf_char ret = 0;
3721
3722     if(c2 < 0x80){
3723         *p2 = 0;
3724         *p1 = c2;
3725     }else if(c2 < 0xe0){
3726         if(no_best_fit_chars_f){
3727             if(ms_ucs_map_f == UCS_MAP_CP932){
3728                 switch(c2){
3729                 case 0xC2:
3730                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3731                     break;
3732                 case 0xC3:
3733                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3734                     break;
3735                 }
3736             }else if(!cp932inv_f){
3737                 switch(c2){
3738                 case 0xC2:
3739                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3740                     break;
3741                 case 0xC3:
3742                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3743                     break;
3744                 }
3745             }else if(ms_ucs_map_f == UCS_MAP_MS){
3746                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3747             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3748                 switch(c2){
3749                 case 0xC2:
3750                     switch(c1){
3751                     case 0xA2:
3752                     case 0xA3:
3753                     case 0xA5:
3754                     case 0xA6:
3755                     case 0xAC:
3756                     case 0xAF:
3757                     case 0xB8:
3758                         return 1;
3759                     }
3760                     break;
3761                 }
3762             }
3763         }
3764         pp =
3765             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3766             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3767             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3768             utf8_to_euc_2bytes;
3769         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3770     }else if(c0 < 0xF0){
3771         if(no_best_fit_chars_f){
3772             if(ms_ucs_map_f == UCS_MAP_CP932){
3773                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3774             }else if(ms_ucs_map_f == UCS_MAP_MS){
3775                 switch(c2){
3776                 case 0xE2:
3777                     switch(c1){
3778                     case 0x80:
3779                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3780                         break;
3781                     case 0x88:
3782                         if(c0 == 0x92) return 1;
3783                         break;
3784                     }
3785                     break;
3786                 case 0xE3:
3787                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3788                     break;
3789                 }
3790             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3791                 switch(c2){
3792                 case 0xE3:
3793                     switch(c1){
3794                     case 0x82:
3795                             if(c0 == 0x94) return 1;
3796                         break;
3797                     case 0x83:
3798                             if(c0 == 0xBB) return 1;
3799                         break;
3800                     }
3801                     break;
3802                 }
3803             }else{
3804                 switch(c2){
3805                 case 0xE2:
3806                     switch(c1){
3807                     case 0x80:
3808                         if(c0 == 0x95) return 1;
3809                         break;
3810                     case 0x88:
3811                         if(c0 == 0xA5) return 1;
3812                         break;
3813                     }
3814                     break;
3815                 case 0xEF:
3816                     switch(c1){
3817                     case 0xBC:
3818                         if(c0 == 0x8D) return 1;
3819                         break;
3820                     case 0xBD:
3821                         if(c0 == 0x9E && !cp932inv_f) return 1;
3822                         break;
3823                     case 0xBF:
3824                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3825                         break;
3826                     }
3827                     break;
3828                 }
3829             }
3830         }
3831         ppp =
3832             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3833             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3834             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3835             utf8_to_euc_3bytes;
3836         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3837     }else return -1;
3838 #ifdef SHIFTJIS_CP932
3839     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3840         nkf_char s2, s1;
3841         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3842             s2e_conv(s2, s1, p2, p1);
3843         }else{
3844             ret = 1;
3845         }
3846     }
3847 #endif
3848     return ret;
3849 }
3850
3851 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3852 {
3853     nkf_char c2;
3854     const unsigned short *p;
3855     unsigned short val;
3856
3857     if (pp == 0) return 1;
3858
3859     c1 -= 0x80;
3860     if (c1 < 0 || psize <= c1) return 1;
3861     p = pp[c1];
3862     if (p == 0)  return 1;
3863
3864     c0 -= 0x80;
3865     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3866     val = p[c0];
3867     if (val == 0) return 1;
3868     if (no_cp932ext_f && (
3869         (val>>8) == 0x2D || /* NEC special characters */
3870         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3871         )) return 1;
3872
3873     c2 = val >> 8;
3874    if (val > 0x7FFF){
3875         c2 &= 0x7f;
3876         c2 |= PREFIX_EUCG3;
3877     }
3878     if (c2 == SO) c2 = JIS_X_0201;
3879     c1 = val & 0x7f;
3880     if (p2) *p2 = c2;
3881     if (p1) *p1 = c1;
3882     return 0;
3883 }
3884
3885 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3886 {
3887     int shift = 20;
3888     c &= VALUE_MASK;
3889     while(shift >= 0){
3890         if(c >= 1<<shift){
3891             while(shift >= 0){
3892                 (*f)(0, bin2hex(c>>shift));
3893                 shift -= 4;
3894             }
3895         }else{
3896             shift -= 4;
3897         }
3898     }
3899     return;
3900 }
3901
3902 void encode_fallback_html(nkf_char c)
3903 {
3904     (*oconv)(0, '&');
3905     (*oconv)(0, '#');
3906     c &= VALUE_MASK;
3907     if(c >= NKF_INT32_C(1000000))
3908         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3909     if(c >= NKF_INT32_C(100000))
3910         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3911     if(c >= 10000)
3912         (*oconv)(0, 0x30+(c/10000  )%10);
3913     if(c >= 1000)
3914         (*oconv)(0, 0x30+(c/1000   )%10);
3915     if(c >= 100)
3916         (*oconv)(0, 0x30+(c/100    )%10);
3917     if(c >= 10)
3918         (*oconv)(0, 0x30+(c/10     )%10);
3919     if(c >= 0)
3920         (*oconv)(0, 0x30+ c         %10);
3921     (*oconv)(0, ';');
3922     return;
3923 }
3924
3925 void encode_fallback_xml(nkf_char c)
3926 {
3927     (*oconv)(0, '&');
3928     (*oconv)(0, '#');
3929     (*oconv)(0, 'x');
3930     nkf_each_char_to_hex(oconv, c);
3931     (*oconv)(0, ';');
3932     return;
3933 }
3934
3935 void encode_fallback_java(nkf_char c)
3936 {
3937     (*oconv)(0, '\\');
3938     c &= VALUE_MASK;
3939     if(!is_unicode_bmp(c)){
3940         (*oconv)(0, 'U');
3941         (*oconv)(0, '0');
3942         (*oconv)(0, '0');
3943         (*oconv)(0, bin2hex(c>>20));
3944         (*oconv)(0, bin2hex(c>>16));
3945     }else{
3946         (*oconv)(0, 'u');
3947     }
3948     (*oconv)(0, bin2hex(c>>12));
3949     (*oconv)(0, bin2hex(c>> 8));
3950     (*oconv)(0, bin2hex(c>> 4));
3951     (*oconv)(0, bin2hex(c    ));
3952     return;
3953 }
3954
3955 void encode_fallback_perl(nkf_char c)
3956 {
3957     (*oconv)(0, '\\');
3958     (*oconv)(0, 'x');
3959     (*oconv)(0, '{');
3960     nkf_each_char_to_hex(oconv, c);
3961     (*oconv)(0, '}');
3962     return;
3963 }
3964
3965 void encode_fallback_subchar(nkf_char c)
3966 {
3967     c = unicode_subchar;
3968     (*oconv)((c>>8)&0xFF, c&0xFF);
3969     return;
3970 }
3971 #endif
3972
3973 #ifdef UTF8_OUTPUT_ENABLE
3974 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3975 {
3976     const unsigned short *p;
3977
3978     if (c2 == JIS_X_0201) {
3979         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3980             switch (c1) {
3981             case 0x20:
3982                 return 0xA0;
3983             case 0x7D:
3984                 return 0xA9;
3985             }
3986         }
3987         p = euc_to_utf8_1byte;
3988 #ifdef X0212_ENABLE
3989     } else if (is_eucg3(c2)){
3990         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3991             return 0xA6;
3992         }
3993         c2 = (c2&0x7f) - 0x21;
3994         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3995             p = x0212_to_utf8_2bytes[c2];
3996         else
3997             return 0;
3998 #endif
3999     } else {
4000         c2 &= 0x7f;
4001         c2 = (c2&0x7f) - 0x21;
4002         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4003             p =
4004                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
4005                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
4006                 euc_to_utf8_2bytes_ms[c2];
4007         else
4008             return 0;
4009     }
4010     if (!p) return 0;
4011     c1 = (c1 & 0x7f) - 0x21;
4012     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
4013         return p[c1];
4014     return 0;
4015 }
4016
4017 void w_oconv(nkf_char c2, nkf_char c1)
4018 {
4019     nkf_char c0;
4020     nkf_char val;
4021
4022     if (output_bom_f) {
4023         output_bom_f = FALSE;
4024         (*o_putc)('\357');
4025         (*o_putc)('\273');
4026         (*o_putc)('\277');
4027     }
4028
4029     if (c2 == EOF) {
4030         (*o_putc)(EOF);
4031         return;
4032     }
4033
4034 #ifdef NUMCHAR_OPTION
4035     if (c2 == 0 && is_unicode_capsule(c1)){
4036         val = c1 & VALUE_MASK;
4037         if (val < 0x80){
4038             (*o_putc)(val);
4039         }else if (val < 0x800){
4040             (*o_putc)(0xC0 | (val >> 6));
4041             (*o_putc)(0x80 | (val & 0x3f));
4042         } else if (val <= NKF_INT32_C(0xFFFF)) {
4043             (*o_putc)(0xE0 | (val >> 12));
4044             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
4045             (*o_putc)(0x80 | (val        & 0x3f));
4046         } else if (val <= NKF_INT32_C(0x10FFFF)) {
4047             (*o_putc)(0xF0 | ( val>>18));
4048             (*o_putc)(0x80 | ((val>>12) & 0x3f));
4049             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
4050             (*o_putc)(0x80 | ( val      & 0x3f));
4051         }
4052         return;
4053     }
4054 #endif
4055
4056     if (c2 == 0) {
4057         output_mode = ASCII;
4058         (*o_putc)(c1);
4059     } else if (c2 == ISO_8859_1) {
4060         output_mode = UTF_8;
4061         (*o_putc)(c1 | 0x080);
4062     } else {
4063         output_mode = UTF_8;
4064         val = e2w_conv(c2, c1);
4065         if (val){
4066             w16w_conv(val, &c2, &c1, &c0);
4067             (*o_putc)(c2);
4068             if (c1){
4069                 (*o_putc)(c1);
4070                 if (c0) (*o_putc)(c0);
4071             }
4072         }
4073     }
4074 }
4075
4076 void w_oconv16(nkf_char c2, nkf_char c1)
4077 {
4078     if (output_bom_f) {
4079         output_bom_f = FALSE;
4080         if (output_endian == ENDIAN_LITTLE){
4081             (*o_putc)((unsigned char)'\377');
4082             (*o_putc)('\376');
4083         }else{
4084             (*o_putc)('\376');
4085             (*o_putc)((unsigned char)'\377');
4086         }
4087     }
4088
4089     if (c2 == EOF) {
4090         (*o_putc)(EOF);
4091         return;
4092     }
4093
4094     if (c2 == ISO_8859_1) {
4095         c2 = 0;
4096         c1 |= 0x80;
4097 #ifdef NUMCHAR_OPTION
4098     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4099         if (is_unicode_bmp(c1)) {
4100             c2 = (c1 >> 8) & 0xff;
4101             c1 &= 0xff;
4102         } else {
4103             c1 &= VALUE_MASK;
4104             if (c1 <= UNICODE_MAX) {
4105                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
4106                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
4107                 if (output_endian == ENDIAN_LITTLE){
4108                     (*o_putc)(c2 & 0xff);
4109                     (*o_putc)((c2 >> 8) & 0xff);
4110                     (*o_putc)(c1 & 0xff);
4111                     (*o_putc)((c1 >> 8) & 0xff);
4112                 }else{
4113                     (*o_putc)((c2 >> 8) & 0xff);
4114                     (*o_putc)(c2 & 0xff);
4115                     (*o_putc)((c1 >> 8) & 0xff);
4116                     (*o_putc)(c1 & 0xff);
4117                 }
4118             }
4119             return;
4120         }
4121 #endif
4122     } else if (c2) {
4123         nkf_char val = e2w_conv(c2, c1);
4124         c2 = (val >> 8) & 0xff;
4125         c1 = val & 0xff;
4126         if (!val) return;
4127     }
4128     if (output_endian == ENDIAN_LITTLE){
4129         (*o_putc)(c1);
4130         (*o_putc)(c2);
4131     }else{
4132         (*o_putc)(c2);
4133         (*o_putc)(c1);
4134     }
4135 }
4136
4137 void w_oconv32(nkf_char c2, nkf_char c1)
4138 {
4139     if (output_bom_f) {
4140         output_bom_f = FALSE;
4141         if (output_endian == ENDIAN_LITTLE){
4142             (*o_putc)((unsigned char)'\377');
4143             (*o_putc)('\376');
4144             (*o_putc)('\000');
4145             (*o_putc)('\000');
4146         }else{
4147             (*o_putc)('\000');
4148             (*o_putc)('\000');
4149             (*o_putc)('\376');
4150             (*o_putc)((unsigned char)'\377');
4151         }
4152     }
4153
4154     if (c2 == EOF) {
4155         (*o_putc)(EOF);
4156         return;
4157     }
4158
4159     if (c2 == ISO_8859_1) {
4160         c1 |= 0x80;
4161 #ifdef NUMCHAR_OPTION
4162     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4163         c1 &= VALUE_MASK;
4164 #endif
4165     } else if (c2) {
4166         c1 = e2w_conv(c2, c1);
4167         if (!c1) return;
4168     }
4169     if (output_endian == ENDIAN_LITTLE){
4170         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4171         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4172         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4173         (*o_putc)('\000');
4174     }else{
4175         (*o_putc)('\000');
4176         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4177         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4178         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4179     }
4180 }
4181 #endif
4182
4183 void e_oconv(nkf_char c2, nkf_char c1)
4184 {
4185 #ifdef NUMCHAR_OPTION
4186     if (c2 == 0 && is_unicode_capsule(c1)){
4187         w16e_conv(c1, &c2, &c1);
4188         if (c2 == 0 && is_unicode_capsule(c1)){
4189             c2 = c1 & VALUE_MASK;
4190             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
4191                 /* eucJP-ms UDC */
4192                 c1 &= 0xFFF;
4193                 c2 = c1 / 94;
4194                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
4195                 c1 = 0x21 + c1 % 94;
4196                 if (is_eucg3(c2)){
4197                     (*o_putc)(0x8f);
4198                     (*o_putc)((c2 & 0x7f) | 0x080);
4199                     (*o_putc)(c1 | 0x080);
4200                 }else{
4201                     (*o_putc)((c2 & 0x7f) | 0x080);
4202                     (*o_putc)(c1 | 0x080);
4203                 }
4204                 return;
4205             } else {
4206                 if (encode_fallback) (*encode_fallback)(c1);
4207                 return;
4208             }
4209         }
4210     }
4211 #endif
4212     if (c2 == EOF) {
4213         (*o_putc)(EOF);
4214         return;
4215     } else if (c2 == 0) {
4216         output_mode = ASCII;
4217         (*o_putc)(c1);
4218     } else if (c2 == JIS_X_0201) {
4219         output_mode = EUC_JP;
4220         (*o_putc)(SSO); (*o_putc)(c1|0x80);
4221     } else if (c2 == ISO_8859_1) {
4222         output_mode = ISO_8859_1;
4223         (*o_putc)(c1 | 0x080);
4224 #ifdef X0212_ENABLE
4225     } else if (is_eucg3(c2)){
4226         output_mode = EUC_JP;
4227 #ifdef SHIFTJIS_CP932
4228         if (!cp932inv_f){
4229             nkf_char s2, s1;
4230        &nb