OSDN Git Service

* input/output encoding refactoring. (for Ruby M17N)
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.155 2007/12/19 04:18:39 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-12-19"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #if defined(DEFAULT_CODE_JIS)
44 #elif defined(DEFAULT_CODE_SJIS)
45 #elif defined(DEFAULT_CODE_EUC)
46 #elif defined(DEFAULT_CODE_UTF8)
47 #else
48 #define DEFAULT_CODE_JIS 1
49 #endif
50
51 #ifndef MIME_DECODE_DEFAULT
52 #define MIME_DECODE_DEFAULT STRICT_MIME
53 #endif
54 #ifndef X0201_DEFAULT
55 #define X0201_DEFAULT TRUE
56 #endif
57
58 #if DEFAULT_NEWLINE == 0x0D0A
59 #define PUT_NEWLINE(func) do {\
60     func(0x0D);\
61     func(0x0A);\
62 } while (0)
63 #define OCONV_NEWLINE(func) do {\
64     func(0, 0x0D);\
65     func(0, 0x0A);\
66 } while (0)
67 #elif DEFAULT_NEWLINE == 0x0D
68 #define PUT_NEWLINE(func) func(0x0D)
69 #define OCONV_NEWLINE(func) func(0, 0x0D)
70 #else
71 #define DEFAULT_NEWLINE 0x0A
72 #define PUT_NEWLINE(func) func(0x0A)
73 #define OCONV_NEWLINE(func) func(0, 0x0A)
74 #endif
75
76 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
77 #define MSDOS
78 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
79 #define __WIN32__
80 #endif
81 #endif
82
83 #ifdef PERL_XS
84 #undef OVERWRITE
85 #endif
86
87 #ifndef PERL_XS
88 #include <stdio.h>
89 #endif
90
91 #include <stdlib.h>
92 #include <string.h>
93
94 #if defined(MSDOS) || defined(__OS2__)
95 #include <fcntl.h>
96 #include <io.h>
97 #if defined(_MSC_VER) || defined(__WATCOMC__)
98 #define mktemp _mktemp
99 #endif
100 #endif
101
102 #ifdef MSDOS
103 #ifdef LSI_C
104 #define setbinmode(fp) fsetbin(fp)
105 #elif defined(__DJGPP__)
106 #include <libc/dosio.h>
107 #define setbinmode(fp) djgpp_setbinmode(fp)
108 #else /* Microsoft C, Turbo C */
109 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
110 #endif
111 #else /* UNIX */
112 #define setbinmode(fp)
113 #endif
114
115 #if defined(__DJGPP__)
116 void  djgpp_setbinmode(FILE *fp)
117 {
118     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
119     int fd, m;
120     fd = fileno(fp);
121     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
122     __file_handle_set(fd, m);
123 }
124 #endif
125
126 #ifdef _IOFBF /* SysV and MSDOS, Windows */
127 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
128 #else /* BSD */
129 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
130 #endif
131
132 /*Borland C++ 4.5 EasyWin*/
133 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
134 #define         EASYWIN
135 #ifndef __WIN16__
136 #define __WIN16__
137 #endif
138 #include <windows.h>
139 #endif
140
141 #ifdef OVERWRITE
142 /* added by satoru@isoternet.org */
143 #if defined(__EMX__)
144 #include <sys/types.h>
145 #endif
146 #include <sys/stat.h>
147 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
148 #include <unistd.h>
149 #if defined(__WATCOMC__)
150 #include <sys/utime.h>
151 #else
152 #include <utime.h>
153 #endif
154 #else /* defined(MSDOS) */
155 #ifdef __WIN32__
156 #ifdef __BORLANDC__ /* BCC32 */
157 #include <utime.h>
158 #else /* !defined(__BORLANDC__) */
159 #include <sys/utime.h>
160 #endif /* (__BORLANDC__) */
161 #else /* !defined(__WIN32__) */
162 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
163 #include <sys/utime.h>
164 #elif defined(__TURBOC__) /* BCC */
165 #include <utime.h>
166 #elif defined(LSI_C) /* LSI C */
167 #endif /* (__WIN32__) */
168 #endif
169 #endif
170 #endif
171
172 #define         FALSE   0
173 #define         TRUE    1
174
175 /* state of output_mode and input_mode
176
177    c2           0 means ASCII
178                 JIS_X_0201
179                 ISO_8859_1
180                 JIS_X_0208
181                 EOF      all termination
182    c1           32bit data
183
184  */
185
186 /* Input Assumption */
187
188 #define         JIS_INPUT       4
189 #define         EUC_INPUT      16
190 #define         SJIS_INPUT      5
191 #define         LATIN1_INPUT    6
192 #define         UTF8_INPUT     13
193 #define         UTF16_INPUT    1015
194 #define         UTF32_INPUT    1017
195
196 #define         FIXED_MIME      7
197 #define         STRICT_MIME     8
198
199 /* MIME ENCODE */
200
201
202 /* byte order */
203
204 #define         ENDIAN_BIG      1234
205 #define         ENDIAN_LITTLE   4321
206 #define         ENDIAN_2143     2143
207 #define         ENDIAN_3412     3412
208
209 /* ASCII CODE */
210
211 #define         BS      0x08
212 #define         TAB     0x09
213 #define         LF      0x0a
214 #define         CR      0x0d
215 #define         ESC     0x1b
216 #define         SP      0x20
217 #define         AT      0x40
218 #define         SSP     0xa0
219 #define         DEL     0x7f
220 #define         SI      0x0f
221 #define         SO      0x0e
222 #define         SSO     0x8e
223 #define         SS3     0x8f
224 #define         CRLF    0x0D0A
225
226
227 /* encodings */
228
229 enum nkf_encodings {
230     ASCII,
231     JIS_X_0208,
232     JIS_X_0201,
233     ISO_8859_1,
234     ISO_2022_JP,
235     CP50220,
236     CP50221,
237     CP50222,
238     ISO_2022_JP_1,
239     ISO_2022_JP_3,
240     SHIFT_JIS,
241     WINDOWS_31J,
242     CP10001,
243     EUC_JP,
244     CP51932,
245     EUCJP_MS,
246     EUCJP_ASCII,
247     SHIFT_JISX0213,
248     SHIFT_JIS_2004,
249     EUC_JISX0213,
250     EUC_JIS_2004,
251     UTF_8,
252     UTF_8N,
253     UTF_8_BOM,
254     UTF8_MAC,
255     UTF_16,
256     UTF_16BE,
257     UTF_16BE_BOM,
258     UTF_16LE,
259     UTF_16LE_BOM,
260     UTF_32,
261     UTF_32BE,
262     UTF_32BE_BOM,
263     UTF_32LE,
264     UTF_32LE_BOM,
265     JIS_X_0212=0x2844,
266     JIS_X_0213_1=0x284F,
267     JIS_X_0213_2=0x2850,
268     BINARY
269 };
270 static const struct {
271     const int id;
272     const char *name;
273 } encoding_id_to_name_table[] = {
274     {ASCII,             "ASCII"},
275     {ISO_8859_1,        "ISO-8859-1"},
276     {ISO_2022_JP,       "ISO-2022-JP"},
277     {CP50220,           "CP50220"},
278     {CP50221,           "CP50221"},
279     {CP50222,           "CP50222"},
280     {ISO_2022_JP_1,     "ISO-2022-JP-1"},
281     {ISO_2022_JP_3,     "ISO-2022-JP-3"},
282     {SHIFT_JIS,         "Shift_JIS"},
283     {WINDOWS_31J,       "WINDOWS-31J"},
284     {CP10001,           "CP10001"},
285     {EUC_JP,            "EUC-JP"},
286     {CP51932,           "CP51932"},
287     {EUCJP_MS,          "eucJP-MS"},
288     {EUCJP_ASCII,       "eucJP-ASCII"},
289     {SHIFT_JISX0213,    "Shift_JISX0213"},
290     {SHIFT_JIS_2004,    "Shift_JIS-2004"},
291     {EUC_JISX0213,      "EUC-JISX0213"},
292     {EUC_JIS_2004,      "EUC-JIS-2004"},
293     {UTF_8,             "UTF-8"},
294     {UTF_8N,            "UTF-8N"},
295     {UTF_8_BOM,         "UTF-8-BOM"},
296     {UTF8_MAC,          "UTF8-MAC"},
297     {UTF_16,            "UTF-16"},
298     {UTF_16BE,          "UTF-16BE"},
299     {UTF_16BE_BOM,      "UTF-16BE-BOM"},
300     {UTF_16LE,          "UTF-16LE"},
301     {UTF_16LE_BOM,      "UTF-16LE-BOM"},
302     {UTF_32,            "UTF-32"},
303     {UTF_32BE,          "UTF-32BE"},
304     {UTF_32BE_BOM,      "UTF-32BE-BOM"},
305     {UTF_32LE,          "UTF-32LE"},
306     {UTF_32LE_BOM,      "UTF-32LE-BOM"},
307     {BINARY,            "BINARY"},
308     {-1,                        ""}
309 };
310 static const struct {
311     const char *name;
312     const int id;
313 } encoding_name_to_id_table[] = {
314     {"ASCII",                   ASCII},
315     {"ISO-2022-JP",             ISO_2022_JP},
316     {"X-ISO2022JP-CP932",       CP50220},
317     {"CP50220",                 CP50220},
318     {"CP50221",                 CP50221},
319     {"CP50222",                 CP50222},
320     {"ISO-2022-JP-1",           ISO_2022_JP_1},
321     {"ISO-2022-JP-3",           ISO_2022_JP_3},
322     {"SHIFT_JIS",               SHIFT_JIS},
323     {"WINDOWS-31J",             WINDOWS_31J},
324     {"CSWINDOWS31J",            WINDOWS_31J},
325     {"CP932",                   WINDOWS_31J},
326     {"MS932",                   WINDOWS_31J},
327     {"CP10001",                 CP10001},
328     {"EUCJP",                   EUC_JP},
329     {"EUC-JP",                  EUC_JP},
330     {"CP51932",                 CP51932},
331     {"EUC-JP-MS",               EUCJP_MS},
332     {"EUCJP-MS",                EUCJP_MS},
333     {"EUCJPMS",                 EUCJP_MS},
334     {"EUC-JP-ASCII",            EUCJP_ASCII},
335     {"EUCJP-ASCII",             EUCJP_ASCII},
336     {"SHIFT_JISX0213",          SHIFT_JISX0213},
337     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
338     {"EUC-JISX0213",            EUC_JISX0213},
339     {"EUC-JIS-2004",            EUC_JIS_2004},
340     {"UTF-8",                   UTF_8},
341     {"UTF-8N",                  UTF_8N},
342     {"UTF-8-BOM",               UTF_8_BOM},
343     {"UTF8-MAC",                UTF8_MAC},
344     {"UTF-8-MAC",               UTF8_MAC},
345     {"UTF-16",                  UTF_16},
346     {"UTF-16BE",                UTF_16BE},
347     {"UTF-16BE-BOM",            UTF_16BE_BOM},
348     {"UTF-16LE",                UTF_16LE},
349     {"UTF-16LE-BOM",            UTF_16LE_BOM},
350     {"UTF-32",                  UTF_32},
351     {"UTF-32BE",                UTF_32BE},
352     {"UTF-32BE-BOM",            UTF_32BE_BOM},
353     {"UTF-32LE",                UTF_32LE},
354     {"UTF-32LE-BOM",            UTF_32LE_BOM},
355     {"BINARY",                  BINARY},
356     {"",                        -1}
357 };
358 #if defined(DEFAULT_CODE_JIS)
359 #define     DEFAULT_ENCODING ISO_2022_JP
360 #elif defined(DEFAULT_CODE_SJIS)
361 #define     DEFAULT_ENCODING SHIFT_JIS
362 #elif defined(DEFAULT_CODE_EUC)
363 #define     DEFAULT_ENCODING EUC_JP
364 #elif defined(DEFAULT_CODE_UTF8)
365 #define     DEFAULT_ENCODING UTF_8
366 #endif
367
368
369 #define         is_alnum(c)  \
370             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
371
372 /* I don't trust portablity of toupper */
373 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
374 #define nkf_isoctal(c)  ('0'<=c && c<='7')
375 #define nkf_isdigit(c)  ('0'<=c && c<='9')
376 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
377 #define nkf_isblank(c) (c == SP || c == TAB)
378 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
379 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
380 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
381 #define nkf_isprint(c) (SP<=c && c<='~')
382 #define nkf_isgraph(c) ('!'<=c && c<='~')
383 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
384                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
385                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
386 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
387 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
388 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
389     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
390      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
391
392 #define CP932_TABLE_BEGIN 0xFA
393 #define CP932_TABLE_END   0xFC
394 #define CP932INV_TABLE_BEGIN 0xED
395 #define CP932INV_TABLE_END   0xEE
396 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
397
398 #define         HOLD_SIZE       1024
399 #if defined(INT_IS_SHORT)
400 #define         IOBUF_SIZE      2048
401 #else
402 #define         IOBUF_SIZE      16384
403 #endif
404
405 #define         DEFAULT_J       'B'
406 #define         DEFAULT_R       'B'
407
408 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
409 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
410
411 #define         RANGE_NUM_MAX   18
412 #define         GETA1   0x22
413 #define         GETA2   0x2e
414
415
416 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
417 #define sizeof_euc_to_utf8_1byte 94
418 #define sizeof_euc_to_utf8_2bytes 94
419 #define sizeof_utf8_to_euc_C2 64
420 #define sizeof_utf8_to_euc_E5B8 64
421 #define sizeof_utf8_to_euc_2bytes 112
422 #define sizeof_utf8_to_euc_3bytes 16
423 #endif
424
425 /* MIME preprocessor */
426
427 #ifdef EASYWIN /*Easy Win */
428 extern POINT _BufferSize;
429 #endif
430
431 struct input_code{
432     char *name;
433     nkf_char stat;
434     nkf_char score;
435     nkf_char index;
436     nkf_char buf[3];
437     void (*status_func)(struct input_code *, nkf_char);
438     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
439     int _file_stat;
440 };
441
442 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
443 static int output_encoding = DEFAULT_ENCODING;
444
445 #if !defined(PERL_XS) && !defined(WIN32DLL)
446 static  nkf_char     noconvert(FILE *f);
447 #endif
448 static  void    module_connection(void);
449 static  nkf_char     kanji_convert(FILE *f);
450 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
451 static  nkf_char     push_hold_buf(nkf_char c2);
452 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
453 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
454 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
455 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
456 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
457 /* UCS Mapping
458  * 0: Shift_JIS, eucJP-ascii
459  * 1: eucJP-ms
460  * 2: CP932, CP51932
461  * 3: CP10001
462  */
463 #define UCS_MAP_ASCII   0
464 #define UCS_MAP_MS      1
465 #define UCS_MAP_CP932   2
466 #define UCS_MAP_CP10001 3
467 static int ms_ucs_map_f = UCS_MAP_ASCII;
468 #endif
469 #ifdef UTF8_INPUT_ENABLE
470 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
471 static  int     no_cp932ext_f = FALSE;
472 /* ignore ZERO WIDTH NO-BREAK SPACE */
473 static  int     no_best_fit_chars_f = FALSE;
474 static  int     input_endian = ENDIAN_BIG;
475 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
476 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
477 static  void    encode_fallback_html(nkf_char c);
478 static  void    encode_fallback_xml(nkf_char c);
479 static  void    encode_fallback_java(nkf_char c);
480 static  void    encode_fallback_perl(nkf_char c);
481 static  void    encode_fallback_subchar(nkf_char c);
482 static  void    (*encode_fallback)(nkf_char c) = NULL;
483 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
484 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
485 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
486 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
487 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
488 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
489 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
490 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
491 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
492 static  void    w_status(struct input_code *, nkf_char);
493 #endif
494 #ifdef UTF8_OUTPUT_ENABLE
495 static  int     output_bom_f = FALSE;
496 static  int     output_endian = ENDIAN_BIG;
497 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
498 static  void    w_oconv(nkf_char c2,nkf_char c1);
499 static  void    w_oconv16(nkf_char c2,nkf_char c1);
500 static  void    w_oconv32(nkf_char c2,nkf_char c1);
501 #endif
502 static  void    e_oconv(nkf_char c2,nkf_char c1);
503 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
504 static  void    s_oconv(nkf_char c2,nkf_char c1);
505 static  void    j_oconv(nkf_char c2,nkf_char c1);
506 static  void    fold_conv(nkf_char c2,nkf_char c1);
507 static  void    nl_conv(nkf_char c2,nkf_char c1);
508 static  void    z_conv(nkf_char c2,nkf_char c1);
509 static  void    rot_conv(nkf_char c2,nkf_char c1);
510 static  void    hira_conv(nkf_char c2,nkf_char c1);
511 static  void    base64_conv(nkf_char c2,nkf_char c1);
512 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
513 static  void    no_connection(nkf_char c2,nkf_char c1);
514 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
515
516 static  void    code_score(struct input_code *ptr);
517 static  void    code_status(nkf_char c);
518
519 static  void    std_putc(nkf_char c);
520 static  nkf_char     std_getc(FILE *f);
521 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
522
523 static  nkf_char     broken_getc(FILE *f);
524 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
525
526 static  nkf_char     mime_begin(FILE *f);
527 static  nkf_char     mime_getc(FILE *f);
528 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
529
530 static  void    switch_mime_getc(void);
531 static  void    unswitch_mime_getc(void);
532 static  nkf_char     mime_begin_strict(FILE *f);
533 static  nkf_char     mime_getc_buf(FILE *f);
534 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
535 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
536
537 static  nkf_char     base64decode(nkf_char c);
538 static  void    mime_prechar(nkf_char c2, nkf_char c1);
539 static  void    mime_putc(nkf_char c);
540 static  void    open_mime(nkf_char c);
541 static  void    close_mime(void);
542 static  void    eof_mime(void);
543 static  void    mimeout_addchar(nkf_char c);
544 #ifndef PERL_XS
545 static  void    usage(void);
546 static  void    version(void);
547 static  void    show_configuration(void);
548 #endif
549 static  void    options(unsigned char *c);
550 static  void    reinit(void);
551
552 /* buffers */
553
554 #if !defined(PERL_XS) && !defined(WIN32DLL)
555 static unsigned char   stdibuf[IOBUF_SIZE];
556 static unsigned char   stdobuf[IOBUF_SIZE];
557 #endif
558 static unsigned char   hold_buf[HOLD_SIZE*2];
559 static int             hold_count = 0;
560
561 /* MIME preprocessor fifo */
562
563 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
564 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
565 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
566 static unsigned char           mime_buf[MIME_BUF_SIZE];
567 static unsigned int            mime_top = 0;
568 static unsigned int            mime_last = 0;  /* decoded */
569 static unsigned int            mime_input = 0; /* undecoded */
570 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
571
572 /* flags */
573 static int             unbuf_f = FALSE;
574 static int             estab_f = FALSE;
575 static int             nop_f = FALSE;
576 static int             binmode_f = TRUE;       /* binary mode */
577 static int             rot_f = FALSE;          /* rot14/43 mode */
578 static int             hira_f = FALSE;          /* hira/kata henkan */
579 static int             input_f = FALSE;        /* non fixed input code  */
580 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
581 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
582 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
583 static int             mimebuf_f = FALSE;      /* MIME buffered input */
584 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
585 static int             iso8859_f = FALSE;      /* ISO8859 through */
586 static int             mimeout_f = FALSE;       /* base64 mode */
587 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
588 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
589
590 #ifdef UNICODE_NORMALIZATION
591 static int nfc_f = FALSE;
592 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
593 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
594 static nkf_char nfc_getc(FILE *f);
595 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
596 #endif
597
598 #ifdef INPUT_OPTION
599 static int cap_f = FALSE;
600 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
601 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
602 static nkf_char cap_getc(FILE *f);
603 static nkf_char cap_ungetc(nkf_char c,FILE *f);
604
605 static int url_f = FALSE;
606 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
607 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
608 static nkf_char url_getc(FILE *f);
609 static nkf_char url_ungetc(nkf_char c,FILE *f);
610 #endif
611
612 #if defined(INT_IS_SHORT)
613 #define NKF_INT32_C(n)   (n##L)
614 #else
615 #define NKF_INT32_C(n)   (n)
616 #endif
617 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
618 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
619 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
620 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
621 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
622 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
623 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
624
625 #ifdef NUMCHAR_OPTION
626 static int numchar_f = FALSE;
627 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
628 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
629 static nkf_char numchar_getc(FILE *f);
630 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
631 #endif
632
633 #ifdef CHECK_OPTION
634 static int noout_f = FALSE;
635 static void no_putc(nkf_char c);
636 static int debug_f = FALSE;
637 static void debug(const char *str);
638 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
639 #endif
640
641 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
642 #if !defined PERL_XS
643 static  void    print_guessed_code(char *filename);
644 #endif
645 static  void    set_input_codename(char *codename);
646
647 #ifdef EXEC_IO
648 static int exec_f = 0;
649 #endif
650
651 #ifdef SHIFTJIS_CP932
652 /* invert IBM extended characters to others */
653 static int cp51932_f = FALSE;
654
655 /* invert NEC-selected IBM extended characters to IBM extended characters */
656 static int cp932inv_f = TRUE;
657
658 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
659 #endif /* SHIFTJIS_CP932 */
660
661 #ifdef X0212_ENABLE
662 static int x0212_f = FALSE;
663 static nkf_char x0212_shift(nkf_char c);
664 static nkf_char x0212_unshift(nkf_char c);
665 #endif
666 static int x0213_f = FALSE;
667
668 static unsigned char prefix_table[256];
669
670 static void set_code_score(struct input_code *ptr, nkf_char score);
671 static void clr_code_score(struct input_code *ptr, nkf_char score);
672 static void status_disable(struct input_code *ptr);
673 static void status_push_ch(struct input_code *ptr, nkf_char c);
674 static void status_clear(struct input_code *ptr);
675 static void status_reset(struct input_code *ptr);
676 static void status_reinit(struct input_code *ptr);
677 static void status_check(struct input_code *ptr, nkf_char c);
678 static void e_status(struct input_code *, nkf_char);
679 static void s_status(struct input_code *, nkf_char);
680
681 struct input_code input_code_list[] = {
682     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
683     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
684 #ifdef UTF8_INPUT_ENABLE
685     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
686     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
687     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
688 #endif
689     {0}
690 };
691
692 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
693 static int              base64_count = 0;
694
695 /* X0208 -> ASCII converter */
696
697 /* fold parameter */
698 static int             f_line = 0;    /* chars in line */
699 static int             f_prev = 0;
700 static int             fold_preserve_f = FALSE; /* preserve new lines */
701 static int             fold_f  = FALSE;
702 static int             fold_len  = 0;
703
704 /* options */
705 static unsigned char   kanji_intro = DEFAULT_J;
706 static unsigned char   ascii_intro = DEFAULT_R;
707
708 /* Folding */
709
710 #define FOLD_MARGIN  10
711 #define DEFAULT_FOLD 60
712
713 static int             fold_margin  = FOLD_MARGIN;
714
715 /* converters */
716
717 #ifdef DEFAULT_CODE_JIS
718 #   define  DEFAULT_CONV j_oconv
719 #endif
720 #ifdef DEFAULT_CODE_SJIS
721 #   define  DEFAULT_CONV s_oconv
722 #endif
723 #ifdef DEFAULT_CODE_EUC
724 #   define  DEFAULT_CONV e_oconv
725 #endif
726 #ifdef DEFAULT_CODE_UTF8
727 #   define  DEFAULT_CONV w_oconv
728 #endif
729
730 /* process default */
731 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
732
733 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
734 /* s_iconv or oconv */
735 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
736
737 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
738 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
739 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
740 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
741 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
742 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
743 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
744
745 /* static redirections */
746
747 static  void   (*o_putc)(nkf_char c) = std_putc;
748
749 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
750 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
751
752 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
753 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
754
755 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
756
757 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
758 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
759
760 /* for strict mime */
761 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
762 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
763
764 /* Global states */
765 static int output_mode = ASCII,    /* output kanji mode */
766            input_mode =  ASCII,    /* input kanji mode */
767            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
768 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
769
770 /* X0201 / X0208 conversion tables */
771
772 /* X0201 kana conversion table */
773 /* 90-9F A0-DF */
774 static const unsigned char cv[]= {
775     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
776     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
777     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
778     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
779     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
780     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
781     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
782     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
783     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
784     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
785     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
786     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
787     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
788     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
789     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
790     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
791     0x00,0x00};
792
793
794 /* X0201 kana conversion table for daguten */
795 /* 90-9F A0-DF */
796 static const unsigned char dv[]= {
797     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
798     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
799     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
800     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
801     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
802     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
803     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
804     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
805     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
806     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
807     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
808     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
809     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
812     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813     0x00,0x00};
814
815 /* X0201 kana conversion table for han-daguten */
816 /* 90-9F A0-DF */
817 static const unsigned char ev[]= {
818     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
822     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
827     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
828     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
829     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
830     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
832     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
833     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834     0x00,0x00};
835
836
837 /* X0208 kigou conversion table */
838 /* 0x8140 - 0x819e */
839 static const unsigned char fv[] = {
840
841     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
842     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
843     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
844     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
845     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
846     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
847     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
849     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
850     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
852     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
853 } ;
854
855
856
857 static int             file_out_f = FALSE;
858 #ifdef OVERWRITE
859 static int             overwrite_f = FALSE;
860 static int             preserve_time_f = FALSE;
861 static int             backup_f = FALSE;
862 static char            *backup_suffix = "";
863 static char *get_backup_filename(const char *suffix, const char *filename);
864 #endif
865
866 static int nlmode_f = 0;   /* CR, LF, CRLF */
867 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
868 static nkf_char prev_cr = 0; /* CR or 0 */
869 #ifdef EASYWIN /*Easy Win */
870 static int             end_check;
871 #endif /*Easy Win */
872
873 #define STD_GC_BUFSIZE (256)
874 nkf_char std_gc_buf[STD_GC_BUFSIZE];
875 nkf_char std_gc_ndx;
876
877 char* nkf_strcpy(const char *str)
878 {
879     char* result = malloc(strlen(str) + 1);
880     if (!result){
881         perror(str);
882         return "";
883     }
884     strcpy(result, str);
885     return result;
886 }
887
888 static void nkf_str_upcase(const char *str, char *res, size_t length)
889 {
890     int i = 0;
891     for (; i < length && str[i]; i++) {
892         res[i] = nkf_toupper(str[i]);
893     }
894     res[i] = 0;
895 }
896
897 static int nkf_enc_find_index(const char *name)
898 {
899     int i, index = -1;
900     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
901         if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
902             return encoding_name_to_id_table[i].id;
903         }
904     }
905     return index;
906 }
907
908 #if defined(PERL_XS) || defined(WIN32DLL)
909 static char* nkf_enc_name(const int index)
910 {
911     int i;
912     const char* name = "ASCII";
913     for (i = 0; encoding_id_to_name_table[i].id >= 0; i++) {
914         if (encoding_id_to_name_table[i].id == index) {
915             return nkf_strcpy(encoding_id_to_name_table[i].name);
916         }
917     }
918     return nkf_strcpy(name);
919 }
920 #endif
921
922 #ifdef WIN32DLL
923 #include "nkf32dll.c"
924 #elif defined(PERL_XS)
925 #else /* WIN32DLL */
926 int main(int argc, char **argv)
927 {
928     FILE  *fin;
929     unsigned char  *cp;
930
931     char *outfname = NULL;
932     char *origfname;
933
934 #ifdef EASYWIN /*Easy Win */
935     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
936 #endif
937
938     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
939         cp = (unsigned char *)*argv;
940         options(cp);
941         if (guess_f) {
942 #ifdef CHECK_OPTION
943             int debug_f_back = debug_f;
944 #endif
945 #ifdef EXEC_IO
946             int exec_f_back = exec_f;
947 #endif
948 #ifdef X0212_ENABLE
949             int x0212_f_back = x0212_f;
950 #endif
951             int x0213_f_back = x0213_f;
952             int guess_f_back = guess_f;
953             reinit();
954             guess_f = guess_f_back;
955             mime_f = FALSE;
956 #ifdef CHECK_OPTION
957             debug_f = debug_f_back;
958 #endif
959 #ifdef EXEC_IO
960             exec_f = exec_f_back;
961 #endif
962 #ifdef X0212_ENABLE
963             x0212_f = x0212_f_back;
964 #endif
965             x0213_f = x0213_f_back;
966         }
967 #ifdef EXEC_IO
968         if (exec_f){
969             int fds[2], pid;
970             if (pipe(fds) < 0 || (pid = fork()) < 0){
971                 abort();
972             }
973             if (pid == 0){
974                 if (exec_f > 0){
975                     close(fds[0]);
976                     dup2(fds[1], 1);
977                 }else{
978                     close(fds[1]);
979                     dup2(fds[0], 0);
980                 }
981                 execvp(argv[1], &argv[1]);
982             }
983             if (exec_f > 0){
984                 close(fds[1]);
985                 dup2(fds[0], 0);
986             }else{
987                 close(fds[0]);
988                 dup2(fds[1], 1);
989             }
990             argc = 0;
991             break;
992         }
993 #endif
994     }
995
996     if (binmode_f == TRUE)
997 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
998     if (freopen("","wb",stdout) == NULL)
999         return (-1);
1000 #else
1001     setbinmode(stdout);
1002 #endif
1003
1004     if (unbuf_f)
1005       setbuf(stdout, (char *) NULL);
1006     else
1007       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
1008
1009     if (argc == 0) {
1010       if (binmode_f == TRUE)
1011 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1012       if (freopen("","rb",stdin) == NULL) return (-1);
1013 #else
1014       setbinmode(stdin);
1015 #endif
1016       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
1017       if (nop_f)
1018           noconvert(stdin);
1019       else {
1020           kanji_convert(stdin);
1021           if (guess_f) print_guessed_code(NULL);
1022       }
1023     } else {
1024       int nfiles = argc;
1025         int is_argument_error = FALSE;
1026       while (argc--) {
1027             input_codename = NULL;
1028             input_newline = 0;
1029 #ifdef CHECK_OPTION
1030             iconv_for_check = 0;
1031 #endif
1032           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
1033               perror(*--argv);
1034                 *argv++;
1035                 is_argument_error = TRUE;
1036                 continue;
1037           } else {
1038 #ifdef OVERWRITE
1039               int fd = 0;
1040               int fd_backup = 0;
1041 #endif
1042
1043 /* reopen file for stdout */
1044               if (file_out_f == TRUE) {
1045 #ifdef OVERWRITE
1046                   if (overwrite_f){
1047                       outfname = malloc(strlen(origfname)
1048                                         + strlen(".nkftmpXXXXXX")
1049                                         + 1);
1050                       if (!outfname){
1051                           perror(origfname);
1052                           return -1;
1053                       }
1054                       strcpy(outfname, origfname);
1055 #ifdef MSDOS
1056                       {
1057                           int i;
1058                           for (i = strlen(outfname); i; --i){
1059                               if (outfname[i - 1] == '/'
1060                                   || outfname[i - 1] == '\\'){
1061                                   break;
1062                               }
1063                           }
1064                           outfname[i] = '\0';
1065                       }
1066                       strcat(outfname, "ntXXXXXX");
1067                       mktemp(outfname);
1068                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
1069                                 S_IREAD | S_IWRITE);
1070 #else
1071                       strcat(outfname, ".nkftmpXXXXXX");
1072                       fd = mkstemp(outfname);
1073 #endif
1074                       if (fd < 0
1075                           || (fd_backup = dup(fileno(stdout))) < 0
1076                           || dup2(fd, fileno(stdout)) < 0
1077                           ){
1078                           perror(origfname);
1079                           return -1;
1080                       }
1081                   }else
1082 #endif
1083                   if(argc == 1) {
1084                       outfname = *argv++;
1085                       argc--;
1086                   } else {
1087                       outfname = "nkf.out";
1088                   }
1089
1090                   if(freopen(outfname, "w", stdout) == NULL) {
1091                       perror (outfname);
1092                       return (-1);
1093                   }
1094                   if (binmode_f == TRUE) {
1095 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1096                       if (freopen("","wb",stdout) == NULL)
1097                            return (-1);
1098 #else
1099                       setbinmode(stdout);
1100 #endif
1101                   }
1102               }
1103               if (binmode_f == TRUE)
1104 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1105                  if (freopen("","rb",fin) == NULL)
1106                     return (-1);
1107 #else
1108                  setbinmode(fin);
1109 #endif
1110               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
1111               if (nop_f)
1112                   noconvert(fin);
1113               else {
1114                   char *filename = NULL;
1115                   kanji_convert(fin);
1116                   if (nfiles > 1) filename = origfname;
1117                   if (guess_f) print_guessed_code(filename);
1118               }
1119               fclose(fin);
1120 #ifdef OVERWRITE
1121               if (overwrite_f) {
1122                   struct stat     sb;
1123 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1124                   time_t tb[2];
1125 #else
1126                   struct utimbuf  tb;
1127 #endif
1128
1129                   fflush(stdout);
1130                   close(fd);
1131                   if (dup2(fd_backup, fileno(stdout)) < 0){
1132                       perror("dup2");
1133                   }
1134                   if (stat(origfname, &sb)) {
1135                       fprintf(stderr, "Can't stat %s\n", origfname);
1136                   }
1137                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
1138                   if (chmod(outfname, sb.st_mode)) {
1139                       fprintf(stderr, "Can't set permission %s\n", outfname);
1140                   }
1141
1142                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
1143                     if(preserve_time_f){
1144 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1145                         tb[0] = tb[1] = sb.st_mtime;
1146                         if (utime(outfname, tb)) {
1147                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1148                         }
1149 #else
1150                         tb.actime  = sb.st_atime;
1151                         tb.modtime = sb.st_mtime;
1152                         if (utime(outfname, &tb)) {
1153                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1154                         }
1155 #endif
1156                     }
1157                     if(backup_f){
1158                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
1159 #ifdef MSDOS
1160                         unlink(backup_filename);
1161 #endif
1162                         if (rename(origfname, backup_filename)) {
1163                             perror(backup_filename);
1164                             fprintf(stderr, "Can't rename %s to %s\n",
1165                                     origfname, backup_filename);
1166                         }
1167                     }else{
1168 #ifdef MSDOS
1169                         if (unlink(origfname)){
1170                             perror(origfname);
1171                         }
1172 #endif
1173                     }
1174                   if (rename(outfname, origfname)) {
1175                       perror(origfname);
1176                       fprintf(stderr, "Can't rename %s to %s\n",
1177                               outfname, origfname);
1178                   }
1179                   free(outfname);
1180               }
1181 #endif
1182           }
1183       }
1184         if (is_argument_error)
1185             return(-1);
1186     }
1187 #ifdef EASYWIN /*Easy Win */
1188     if (file_out_f == FALSE)
1189         scanf("%d",&end_check);
1190     else
1191         fclose(stdout);
1192 #else /* for Other OS */
1193     if (file_out_f == TRUE)
1194         fclose(stdout);
1195 #endif /*Easy Win */
1196     return (0);
1197 }
1198 #endif /* WIN32DLL */
1199
1200 #ifdef OVERWRITE
1201 char *get_backup_filename(const char *suffix, const char *filename)
1202 {
1203     char *backup_filename;
1204     int asterisk_count = 0;
1205     int i, j;
1206     int filename_length = strlen(filename);
1207
1208     for(i = 0; suffix[i]; i++){
1209         if(suffix[i] == '*') asterisk_count++;
1210     }
1211
1212     if(asterisk_count){
1213         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1214         if (!backup_filename){
1215             perror("Can't malloc backup filename.");
1216             return NULL;
1217         }
1218
1219         for(i = 0, j = 0; suffix[i];){
1220             if(suffix[i] == '*'){
1221                 backup_filename[j] = '\0';
1222                 strncat(backup_filename, filename, filename_length);
1223                 i++;
1224                 j += filename_length;
1225             }else{
1226                 backup_filename[j++] = suffix[i++];
1227             }
1228         }
1229         backup_filename[j] = '\0';
1230     }else{
1231         j = strlen(suffix) + filename_length;
1232         backup_filename = malloc( + 1);
1233         strcpy(backup_filename, filename);
1234         strcat(backup_filename, suffix);
1235         backup_filename[j] = '\0';
1236     }
1237     return backup_filename;
1238 }
1239 #endif
1240
1241 static const struct {
1242     const char *name;
1243     const char *alias;
1244 } long_option[] = {
1245     {"ic=", ""},
1246     {"oc=", ""},
1247     {"base64","jMB"},
1248     {"euc","e"},
1249     {"euc-input","E"},
1250     {"fj","jm"},
1251     {"help","v"},
1252     {"jis","j"},
1253     {"jis-input","J"},
1254     {"mac","sLm"},
1255     {"mime","jM"},
1256     {"mime-input","m"},
1257     {"msdos","sLw"},
1258     {"sjis","s"},
1259     {"sjis-input","S"},
1260     {"unix","eLu"},
1261     {"version","V"},
1262     {"windows","sLw"},
1263     {"hiragana","h1"},
1264     {"katakana","h2"},
1265     {"katakana-hiragana","h3"},
1266     {"guess=", ""},
1267     {"guess", "g1"},
1268     {"cp932", ""},
1269     {"no-cp932", ""},
1270 #ifdef X0212_ENABLE
1271     {"x0212", ""},
1272 #endif
1273 #ifdef UTF8_OUTPUT_ENABLE
1274     {"utf8", "w"},
1275     {"utf16", "w16"},
1276     {"ms-ucs-map", ""},
1277     {"fb-skip", ""},
1278     {"fb-html", ""},
1279     {"fb-xml", ""},
1280     {"fb-perl", ""},
1281     {"fb-java", ""},
1282     {"fb-subchar", ""},
1283     {"fb-subchar=", ""},
1284 #endif
1285 #ifdef UTF8_INPUT_ENABLE
1286     {"utf8-input", "W"},
1287     {"utf16-input", "W16"},
1288     {"no-cp932ext", ""},
1289     {"no-best-fit-chars",""},
1290 #endif
1291 #ifdef UNICODE_NORMALIZATION
1292     {"utf8mac-input", ""},
1293 #endif
1294 #ifdef OVERWRITE
1295     {"overwrite", ""},
1296     {"overwrite=", ""},
1297     {"in-place", ""},
1298     {"in-place=", ""},
1299 #endif
1300 #ifdef INPUT_OPTION
1301     {"cap-input", ""},
1302     {"url-input", ""},
1303 #endif
1304 #ifdef NUMCHAR_OPTION
1305     {"numchar-input", ""},
1306 #endif
1307 #ifdef CHECK_OPTION
1308     {"no-output", ""},
1309     {"debug", ""},
1310 #endif
1311 #ifdef SHIFTJIS_CP932
1312     {"cp932inv", ""},
1313 #endif
1314 #ifdef EXEC_IO
1315     {"exec-in", ""},
1316     {"exec-out", ""},
1317 #endif
1318     {"prefix=", ""},
1319 };
1320
1321 static int option_mode = 0;
1322
1323 void options(unsigned char *cp)
1324 {
1325     nkf_char i, j;
1326     unsigned char *p;
1327     unsigned char *cp_back = NULL;
1328     char codeset[32];
1329
1330     if (option_mode==1)
1331         return;
1332     while(*cp && *cp++!='-');
1333     while (*cp || cp_back) {
1334         if(!*cp){
1335             cp = cp_back;
1336             cp_back = NULL;
1337             continue;
1338         }
1339         p = 0;
1340         switch (*cp++) {
1341         case '-':  /* literal options */
1342             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1343                 option_mode = 1;
1344                 return;
1345             }
1346             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1347                 p = (unsigned char *)long_option[i].name;
1348                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1349                 if (*p == cp[j] || cp[j] == SP){
1350                     p = &cp[j] + 1;
1351                     break;
1352                 }
1353                 p = 0;
1354             }
1355             if (p == 0) {
1356                 fprintf(stderr, "unknown long option: --%s\n", cp);
1357                 return;
1358             }
1359             while(*cp && *cp != SP && cp++);
1360             if (long_option[i].alias[0]){
1361                 cp_back = cp;
1362                 cp = (unsigned char *)long_option[i].alias;
1363             }else{
1364                 if (strcmp(long_option[i].name, "ic=") == 0){
1365                     nkf_str_upcase(p, codeset, 32);
1366                     i = nkf_enc_find_index(codeset);
1367                     switch (i) {
1368                     case ISO_2022_JP:
1369                         input_f = JIS_INPUT;
1370                         break;
1371                     case CP50220:
1372                     case CP50221:
1373                     case CP50222:
1374                         input_f = JIS_INPUT;
1375 #ifdef SHIFTJIS_CP932
1376                         cp51932_f = TRUE;
1377 #endif
1378 #ifdef UTF8_OUTPUT_ENABLE
1379                         ms_ucs_map_f = UCS_MAP_CP932;
1380 #endif
1381                         break;
1382                     case ISO_2022_JP_1:
1383                         input_f = JIS_INPUT;
1384 #ifdef X0212_ENABLE
1385                         x0212_f = TRUE;
1386 #endif
1387                         break;
1388                     case ISO_2022_JP_3:
1389                         input_f = JIS_INPUT;
1390 #ifdef X0212_ENABLE
1391                         x0212_f = TRUE;
1392 #endif
1393                         x0213_f = TRUE;
1394                         break;
1395                     case SHIFT_JIS:
1396                         input_f = SJIS_INPUT;
1397                         break;
1398                     case WINDOWS_31J:
1399                         input_f = SJIS_INPUT;
1400 #ifdef SHIFTJIS_CP932
1401                         cp51932_f = TRUE;
1402 #endif
1403 #ifdef UTF8_OUTPUT_ENABLE
1404                         ms_ucs_map_f = UCS_MAP_CP932;
1405 #endif
1406                         break;
1407                     case CP10001:
1408                         input_f = SJIS_INPUT;
1409 #ifdef SHIFTJIS_CP932
1410                         cp51932_f = TRUE;
1411 #endif
1412 #ifdef UTF8_OUTPUT_ENABLE
1413                         ms_ucs_map_f = UCS_MAP_CP10001;
1414 #endif
1415                         break;
1416                     case EUC_JP:
1417                         input_f = EUC_INPUT;
1418                         break;
1419                     case CP51932:
1420                         input_f = EUC_INPUT;
1421 #ifdef SHIFTJIS_CP932
1422                         cp51932_f = TRUE;
1423 #endif
1424 #ifdef UTF8_OUTPUT_ENABLE
1425                         ms_ucs_map_f = UCS_MAP_CP932;
1426 #endif
1427                         break;
1428                     case EUCJP_MS:
1429                         input_f = EUC_INPUT;
1430 #ifdef SHIFTJIS_CP932
1431                         cp51932_f = FALSE;
1432 #endif
1433 #ifdef UTF8_OUTPUT_ENABLE
1434                         ms_ucs_map_f = UCS_MAP_MS;
1435 #endif
1436                         break;
1437                     case EUCJP_ASCII:
1438                         input_f = EUC_INPUT;
1439 #ifdef SHIFTJIS_CP932
1440                         cp51932_f = FALSE;
1441 #endif
1442 #ifdef UTF8_OUTPUT_ENABLE
1443                         ms_ucs_map_f = UCS_MAP_ASCII;
1444 #endif
1445                         break;
1446                     case SHIFT_JISX0213:
1447                     case SHIFT_JIS_2004:
1448                         input_f = SJIS_INPUT;
1449                         x0213_f = TRUE;
1450 #ifdef SHIFTJIS_CP932
1451                         cp51932_f = FALSE;
1452 #endif
1453                         break;
1454                     case EUC_JISX0213:
1455                     case EUC_JIS_2004:
1456                         input_f = EUC_INPUT;
1457                         x0213_f = TRUE;
1458 #ifdef SHIFTJIS_CP932
1459                         cp51932_f = FALSE;
1460 #endif
1461                         break;
1462 #ifdef UTF8_INPUT_ENABLE
1463                     case UTF_8:
1464                     case UTF_8N:
1465                     case UTF_8_BOM:
1466                         input_f = UTF8_INPUT;
1467                         break;
1468 #ifdef UNICODE_NORMALIZATION
1469                     case UTF8_MAC:
1470                         input_f = UTF8_INPUT;
1471                         nfc_f = TRUE;
1472                         break;
1473 #endif
1474                     case UTF_16:
1475                     case UTF_16BE:
1476                     case UTF_16BE_BOM:
1477                         input_f = UTF16_INPUT;
1478                         input_endian = ENDIAN_BIG;
1479                         break;
1480                     case UTF_16LE:
1481                     case UTF_16LE_BOM:
1482                         input_f = UTF16_INPUT;
1483                         input_endian = ENDIAN_LITTLE;
1484                         break;
1485                     case UTF_32:
1486                     case UTF_32BE:
1487                     case UTF_32BE_BOM:
1488                         input_f = UTF32_INPUT;
1489                         input_endian = ENDIAN_BIG;
1490                         break;
1491                     case UTF_32LE:
1492                     case UTF_32LE_BOM:
1493                         input_f = UTF32_INPUT;
1494                         input_endian = ENDIAN_LITTLE;
1495                         break;
1496 #endif
1497                     default:
1498                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1499                         break;
1500                     }
1501                     continue;
1502                 }
1503                 if (strcmp(long_option[i].name, "oc=") == 0){
1504                     nkf_str_upcase(p, codeset, 32);
1505                     output_encoding = nkf_enc_find_index(codeset);
1506                     x0201_f = FALSE;
1507                     switch (output_encoding) {
1508                     case ISO_2022_JP:
1509                         output_conv = j_oconv;
1510                         break;
1511                     case CP50220:
1512                             output_conv = j_oconv;
1513                             x0201_f = TRUE;
1514 #ifdef SHIFTJIS_CP932
1515                             cp932inv_f = FALSE;
1516 #endif
1517 #ifdef UTF8_OUTPUT_ENABLE
1518                             ms_ucs_map_f = UCS_MAP_CP932;
1519 #endif
1520                         break;
1521                     case CP50221:
1522                         output_conv = j_oconv;
1523 #ifdef SHIFTJIS_CP932
1524                         cp932inv_f = FALSE;
1525 #endif
1526 #ifdef UTF8_OUTPUT_ENABLE
1527                         ms_ucs_map_f = UCS_MAP_CP932;
1528 #endif
1529                         break;
1530                     case ISO_2022_JP_1:
1531                         output_conv = j_oconv;
1532 #ifdef X0212_ENABLE
1533                         x0212_f = TRUE;
1534 #endif
1535 #ifdef SHIFTJIS_CP932
1536                         cp932inv_f = FALSE;
1537 #endif
1538                         break;
1539                     case ISO_2022_JP_3:
1540                         output_conv = j_oconv;
1541 #ifdef X0212_ENABLE
1542                         x0212_f = TRUE;
1543 #endif
1544                         x0213_f = TRUE;
1545 #ifdef SHIFTJIS_CP932
1546                         cp932inv_f = FALSE;
1547 #endif
1548                         break;
1549                     case SHIFT_JIS:
1550                         output_conv = s_oconv;
1551                         break;
1552                     case WINDOWS_31J:
1553                         output_conv = s_oconv;
1554 #ifdef UTF8_OUTPUT_ENABLE
1555                         ms_ucs_map_f = UCS_MAP_CP932;
1556 #endif
1557                         break;
1558                     case CP10001:
1559                         output_conv = s_oconv;
1560 #ifdef UTF8_OUTPUT_ENABLE
1561                         ms_ucs_map_f = UCS_MAP_CP10001;
1562 #endif
1563                         break;
1564                     case EUC_JP:
1565                         output_conv = e_oconv;
1566                         break;
1567                     case CP51932:
1568                         output_conv = e_oconv;
1569 #ifdef SHIFTJIS_CP932
1570                         cp932inv_f = FALSE;
1571 #endif
1572 #ifdef UTF8_OUTPUT_ENABLE
1573                         ms_ucs_map_f = UCS_MAP_CP932;
1574 #endif
1575                         break;
1576                     case EUCJP_MS:
1577                         output_conv = e_oconv;
1578 #ifdef X0212_ENABLE
1579                         x0212_f = TRUE;
1580 #endif
1581 #ifdef UTF8_OUTPUT_ENABLE
1582                         ms_ucs_map_f = UCS_MAP_MS;
1583 #endif
1584                         break;
1585                     case EUCJP_ASCII:
1586                         output_conv = e_oconv;
1587 #ifdef X0212_ENABLE
1588                         x0212_f = TRUE;
1589 #endif
1590 #ifdef UTF8_OUTPUT_ENABLE
1591                         ms_ucs_map_f = UCS_MAP_ASCII;
1592 #endif
1593                         break;
1594                     case SHIFT_JISX0213:
1595                     case SHIFT_JIS_2004:
1596                             output_conv = s_oconv;
1597                             x0213_f = TRUE;
1598 #ifdef SHIFTJIS_CP932
1599                             cp932inv_f = FALSE;
1600 #endif
1601                         break;
1602                     case EUC_JISX0213:
1603                     case EUC_JIS_2004:
1604                         output_conv = e_oconv;
1605 #ifdef X0212_ENABLE
1606                         x0212_f = TRUE;
1607 #endif
1608                         x0213_f = TRUE;
1609 #ifdef SHIFTJIS_CP932
1610                         cp932inv_f = FALSE;
1611 #endif
1612                         break;
1613 #ifdef UTF8_OUTPUT_ENABLE
1614                     case UTF_8:
1615                     case UTF_8N:
1616                         output_conv = w_oconv;
1617                         break;
1618                     case UTF_8_BOM:
1619                         output_conv = w_oconv;
1620                         output_bom_f = TRUE;
1621                         break;
1622                     case UTF_16BE:
1623                         output_conv = w_oconv16;
1624                         break;
1625                     case UTF_16:
1626                     case UTF_16BE_BOM:
1627                         output_conv = w_oconv16;
1628                         output_bom_f = TRUE;
1629                         break;
1630                     case UTF_16LE:
1631                         output_conv = w_oconv16;
1632                         output_endian = ENDIAN_LITTLE;
1633                         break;
1634                     case UTF_16LE_BOM:
1635                         output_conv = w_oconv16;
1636                         output_endian = ENDIAN_LITTLE;
1637                         output_bom_f = TRUE;
1638                         break;
1639                     case UTF_32:
1640                     case UTF_32BE:
1641                         output_conv = w_oconv32;
1642                         break;
1643                     case UTF_32BE_BOM:
1644                         output_conv = w_oconv32;
1645                         output_bom_f = TRUE;
1646                         break;
1647                     case UTF_32LE:
1648                         output_conv = w_oconv32;
1649                         output_endian = ENDIAN_LITTLE;
1650                         break;
1651                     case UTF_32LE_BOM:
1652                         output_conv = w_oconv32;
1653                         output_endian = ENDIAN_LITTLE;
1654                         output_bom_f = TRUE;
1655                         break;
1656 #endif
1657                     default:
1658                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1659                         break;
1660                     }
1661                     continue;
1662                 }
1663                 if (strcmp(long_option[i].name, "guess=") == 0){
1664                     if (p[0] == '1') {
1665                         guess_f = 2;
1666                     } else {
1667                         guess_f = 1;
1668                     }
1669                     continue;
1670                 }
1671 #ifdef OVERWRITE
1672                 if (strcmp(long_option[i].name, "overwrite") == 0){
1673                     file_out_f = TRUE;
1674                     overwrite_f = TRUE;
1675                     preserve_time_f = TRUE;
1676                     continue;
1677                 }
1678                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1679                     file_out_f = TRUE;
1680                     overwrite_f = TRUE;
1681                     preserve_time_f = TRUE;
1682                     backup_f = TRUE;
1683                     backup_suffix = malloc(strlen((char *) p) + 1);
1684                     strcpy(backup_suffix, (char *) p);
1685                     continue;
1686                 }
1687                 if (strcmp(long_option[i].name, "in-place") == 0){
1688                     file_out_f = TRUE;
1689                     overwrite_f = TRUE;
1690                     preserve_time_f = FALSE;
1691                     continue;
1692                 }
1693                 if (strcmp(long_option[i].name, "in-place=") == 0){
1694                     file_out_f = TRUE;
1695                     overwrite_f = TRUE;
1696                     preserve_time_f = FALSE;
1697                     backup_f = TRUE;
1698                     backup_suffix = malloc(strlen((char *) p) + 1);
1699                     strcpy(backup_suffix, (char *) p);
1700                     continue;
1701                 }
1702 #endif
1703 #ifdef INPUT_OPTION
1704                 if (strcmp(long_option[i].name, "cap-input") == 0){
1705                     cap_f = TRUE;
1706                     continue;
1707                 }
1708                 if (strcmp(long_option[i].name, "url-input") == 0){
1709                     url_f = TRUE;
1710                     continue;
1711                 }
1712 #endif
1713 #ifdef NUMCHAR_OPTION
1714                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1715                     numchar_f = TRUE;
1716                     continue;
1717                 }
1718 #endif
1719 #ifdef CHECK_OPTION
1720                 if (strcmp(long_option[i].name, "no-output") == 0){
1721                     noout_f = TRUE;
1722                     continue;
1723                 }
1724                 if (strcmp(long_option[i].name, "debug") == 0){
1725                     debug_f = TRUE;
1726                     continue;
1727                 }
1728 #endif
1729                 if (strcmp(long_option[i].name, "cp932") == 0){
1730 #ifdef SHIFTJIS_CP932
1731                     cp51932_f = TRUE;
1732                     cp932inv_f = TRUE;
1733 #endif
1734 #ifdef UTF8_OUTPUT_ENABLE
1735                     ms_ucs_map_f = UCS_MAP_CP932;
1736 #endif
1737                     continue;
1738                 }
1739                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1740 #ifdef SHIFTJIS_CP932
1741                     cp51932_f = FALSE;
1742                     cp932inv_f = FALSE;
1743 #endif
1744 #ifdef UTF8_OUTPUT_ENABLE
1745                     ms_ucs_map_f = UCS_MAP_ASCII;
1746 #endif
1747                     continue;
1748                 }
1749 #ifdef SHIFTJIS_CP932
1750                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1751                     cp932inv_f = TRUE;
1752                     continue;
1753                 }
1754 #endif
1755
1756 #ifdef X0212_ENABLE
1757                 if (strcmp(long_option[i].name, "x0212") == 0){
1758                     x0212_f = TRUE;
1759                     continue;
1760                 }
1761 #endif
1762
1763 #ifdef EXEC_IO
1764                   if (strcmp(long_option[i].name, "exec-in") == 0){
1765                       exec_f = 1;
1766                       return;
1767                   }
1768                   if (strcmp(long_option[i].name, "exec-out") == 0){
1769                       exec_f = -1;
1770                       return;
1771                   }
1772 #endif
1773 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1774                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1775                     no_cp932ext_f = TRUE;
1776                     continue;
1777                 }
1778                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1779                     no_best_fit_chars_f = TRUE;
1780                     continue;
1781                 }
1782                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1783                     encode_fallback = NULL;
1784                     continue;
1785                 }
1786                 if (strcmp(long_option[i].name, "fb-html") == 0){
1787                     encode_fallback = encode_fallback_html;
1788                     continue;
1789                 }
1790                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1791                     encode_fallback = encode_fallback_xml;
1792                     continue;
1793                 }
1794                 if (strcmp(long_option[i].name, "fb-java") == 0){
1795                     encode_fallback = encode_fallback_java;
1796                     continue;
1797                 }
1798                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1799                     encode_fallback = encode_fallback_perl;
1800                     continue;
1801                 }
1802                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1803                     encode_fallback = encode_fallback_subchar;
1804                     continue;
1805                 }
1806                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1807                     encode_fallback = encode_fallback_subchar;
1808                     unicode_subchar = 0;
1809                     if (p[0] != '0'){
1810                         /* decimal number */
1811                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1812                             unicode_subchar *= 10;
1813                             unicode_subchar += hex2bin(p[i]);
1814                         }
1815                     }else if(p[1] == 'x' || p[1] == 'X'){
1816                         /* hexadecimal number */
1817                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1818                             unicode_subchar <<= 4;
1819                             unicode_subchar |= hex2bin(p[i]);
1820                         }
1821                     }else{
1822                         /* octal number */
1823                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1824                             unicode_subchar *= 8;
1825                             unicode_subchar += hex2bin(p[i]);
1826                         }
1827                     }
1828                     w16e_conv(unicode_subchar, &i, &j);
1829                     unicode_subchar = i<<8 | j;
1830                     continue;
1831                 }
1832 #endif
1833 #ifdef UTF8_OUTPUT_ENABLE
1834                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1835                     ms_ucs_map_f = UCS_MAP_MS;
1836                     continue;
1837                 }
1838 #endif
1839 #ifdef UNICODE_NORMALIZATION
1840                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1841                     input_f = UTF8_INPUT;
1842                     nfc_f = TRUE;
1843                     continue;
1844                 }
1845 #endif
1846                 if (strcmp(long_option[i].name, "prefix=") == 0){
1847                     if (nkf_isgraph(p[0])){
1848                         for (i = 1; nkf_isgraph(p[i]); i++){
1849                             prefix_table[p[i]] = p[0];
1850                         }
1851                     }
1852                     continue;
1853                 }
1854             }
1855             continue;
1856         case 'b':           /* buffered mode */
1857             unbuf_f = FALSE;
1858             continue;
1859         case 'u':           /* non bufferd mode */
1860             unbuf_f = TRUE;
1861             continue;
1862         case 't':           /* transparent mode */
1863             if (*cp=='1') {
1864                 /* alias of -t */
1865                 nop_f = TRUE;
1866                 *cp++;
1867             } else if (*cp=='2') {
1868                 /*
1869                  * -t with put/get
1870                  *
1871                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1872                  *
1873                  */
1874                 nop_f = 2;
1875                 *cp++;
1876             } else
1877                 nop_f = TRUE;
1878             continue;
1879         case 'j':           /* JIS output */
1880         case 'n':
1881             output_conv = j_oconv;
1882             output_encoding = ISO_2022_JP;
1883             continue;
1884         case 'e':           /* AT&T EUC output */
1885             output_conv = e_oconv;
1886             cp932inv_f = FALSE;
1887             output_encoding = EUC_JP;
1888             continue;
1889         case 's':           /* SJIS output */
1890             output_conv = s_oconv;
1891             output_encoding = SHIFT_JIS;
1892             continue;
1893         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1894             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1895             input_f = LATIN1_INPUT;
1896             continue;
1897         case 'i':           /* Kanji IN ESC-$-@/B */
1898             if (*cp=='@'||*cp=='B')
1899                 kanji_intro = *cp++;
1900             continue;
1901         case 'o':           /* ASCII IN ESC-(-J/B */
1902             if (*cp=='J'||*cp=='B'||*cp=='H')
1903                 ascii_intro = *cp++;
1904             continue;
1905         case 'h':
1906             /*
1907                 bit:1   katakana->hiragana
1908                 bit:2   hiragana->katakana
1909             */
1910             if ('9'>= *cp && *cp>='0')
1911                 hira_f |= (*cp++ -'0');
1912             else
1913                 hira_f |= 1;
1914             continue;
1915         case 'r':
1916             rot_f = TRUE;
1917             continue;
1918 #if defined(MSDOS) || defined(__OS2__)
1919         case 'T':
1920             binmode_f = FALSE;
1921             continue;
1922 #endif
1923 #ifndef PERL_XS
1924         case 'V':
1925             show_configuration();
1926             exit(1);
1927             break;
1928         case 'v':
1929             usage();
1930             exit(1);
1931             break;
1932 #endif
1933 #ifdef UTF8_OUTPUT_ENABLE
1934         case 'w':           /* UTF-8 output */
1935             if (cp[0] == '8') {
1936                 output_conv = w_oconv; cp++;
1937                 if (cp[0] == '0'){
1938                     cp++;
1939                     output_encoding = UTF_8N;
1940                 } else {
1941                     output_bom_f = TRUE;
1942                     output_encoding = UTF_8_BOM;
1943                 }
1944             } else {
1945                 if ('1'== cp[0] && '6'==cp[1]) {
1946                     output_conv = w_oconv16; cp+=2;
1947                     output_encoding = UTF_16;
1948                 } else if ('3'== cp[0] && '2'==cp[1]) {
1949                     output_conv = w_oconv32; cp+=2;
1950                     output_encoding = UTF_32;
1951                 } else {
1952                     output_conv = w_oconv;
1953                     output_encoding = UTF_8;
1954                     continue;
1955                 }
1956                 if (cp[0]=='L') {
1957                     cp++;
1958                     output_endian = ENDIAN_LITTLE;
1959                 } else if (cp[0] == 'B') {
1960                     cp++;
1961                 } else {
1962                     continue;
1963                 }
1964                 if (cp[0] == '0'){
1965                     cp++;
1966                     output_encoding = output_encoding == UTF_16
1967                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1968                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1969                 } else {
1970                     output_bom_f = TRUE;
1971                     output_encoding = output_encoding == UTF_16
1972                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
1973                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
1974                 }
1975             }
1976             continue;
1977 #endif
1978 #ifdef UTF8_INPUT_ENABLE
1979         case 'W':           /* UTF input */
1980             if (cp[0] == '8') {
1981                 cp++;
1982                 input_f = UTF8_INPUT;
1983             }else{
1984                 if ('1'== cp[0] && '6'==cp[1]) {
1985                     cp += 2;
1986                     input_f = UTF16_INPUT;
1987                     input_endian = ENDIAN_BIG;
1988                 } else if ('3'== cp[0] && '2'==cp[1]) {
1989                     cp += 2;
1990                     input_f = UTF32_INPUT;
1991                     input_endian = ENDIAN_BIG;
1992                 } else {
1993                     input_f = UTF8_INPUT;
1994                     continue;
1995                 }
1996                 if (cp[0]=='L') {
1997                     cp++;
1998                     input_endian = ENDIAN_LITTLE;
1999                 } else if (cp[0] == 'B') {
2000                     cp++;
2001                 }
2002             }
2003             continue;
2004 #endif
2005         /* Input code assumption */
2006         case 'J':   /* JIS input */
2007             input_f = JIS_INPUT;
2008             continue;
2009         case 'E':   /* AT&T EUC input */
2010             input_f = EUC_INPUT;
2011             continue;
2012         case 'S':   /* MS Kanji input */
2013             input_f = SJIS_INPUT;
2014             continue;
2015         case 'Z':   /* Convert X0208 alphabet to asii */
2016             /* alpha_f
2017                bit:0   Convert JIS X 0208 Alphabet to ASCII
2018                bit:1   Convert Kankaku to one space
2019                bit:2   Convert Kankaku to two spaces
2020                bit:3   Convert HTML Entity
2021                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
2022             */
2023             while ('0'<= *cp && *cp <='9') {
2024                 alpha_f |= 1 << (*cp++ - '0');
2025             }
2026             if (!alpha_f) alpha_f = 1;
2027             continue;
2028         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
2029             x0201_f = FALSE;    /* No X0201->X0208 conversion */
2030             /* accept  X0201
2031                     ESC-(-I     in JIS, EUC, MS Kanji
2032                     SI/SO       in JIS, EUC, MS Kanji
2033                     SSO         in EUC, JIS, not in MS Kanji
2034                     MS Kanji (0xa0-0xdf)
2035                output  X0201
2036                     ESC-(-I     in JIS (0x20-0x5f)
2037                     SSO         in EUC (0xa0-0xdf)
2038                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
2039             */
2040             continue;
2041         case 'X':   /* Convert X0201 kana to X0208 */
2042             x0201_f = TRUE;
2043             continue;
2044         case 'F':   /* prserve new lines */
2045             fold_preserve_f = TRUE;
2046         case 'f':   /* folding -f60 or -f */
2047             fold_f = TRUE;
2048             fold_len = 0;
2049             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2050                 fold_len *= 10;
2051                 fold_len += *cp++ - '0';
2052             }
2053             if (!(0<fold_len && fold_len<BUFSIZ))
2054                 fold_len = DEFAULT_FOLD;
2055             if (*cp=='-') {
2056                 fold_margin = 0;
2057                 cp++;
2058                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2059                     fold_margin *= 10;
2060                     fold_margin += *cp++ - '0';
2061                 }
2062             }
2063             continue;
2064         case 'm':   /* MIME support */
2065             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
2066             if (*cp=='B'||*cp=='Q') {
2067                 mime_decode_mode = *cp++;
2068                 mimebuf_f = FIXED_MIME;
2069             } else if (*cp=='N') {
2070                 mime_f = TRUE; cp++;
2071             } else if (*cp=='S') {
2072                 mime_f = STRICT_MIME; cp++;
2073             } else if (*cp=='0') {
2074                 mime_decode_f = FALSE;
2075                 mime_f = FALSE; cp++;
2076             }
2077             continue;
2078         case 'M':   /* MIME output */
2079             if (*cp=='B') {
2080                 mimeout_mode = 'B';
2081                 mimeout_f = FIXED_MIME; cp++;
2082             } else if (*cp=='Q') {
2083                 mimeout_mode = 'Q';
2084                 mimeout_f = FIXED_MIME; cp++;
2085             } else {
2086                 mimeout_f = TRUE;
2087             }
2088             continue;
2089         case 'B':   /* Broken JIS support */
2090             /*  bit:0   no ESC JIS
2091                 bit:1   allow any x on ESC-(-x or ESC-$-x
2092                 bit:2   reset to ascii on NL
2093             */
2094             if ('9'>= *cp && *cp>='0')
2095                 broken_f |= 1<<(*cp++ -'0');
2096             else
2097                 broken_f |= TRUE;
2098             continue;
2099 #ifndef PERL_XS
2100         case 'O':/* for Output file */
2101             file_out_f = TRUE;
2102             continue;
2103 #endif
2104         case 'c':/* add cr code */
2105             nlmode_f = CRLF;
2106             continue;
2107         case 'd':/* delete cr code */
2108             nlmode_f = LF;
2109             continue;
2110         case 'I':   /* ISO-2022-JP output */
2111             iso2022jp_f = TRUE;
2112             continue;
2113         case 'L':  /* line mode */
2114             if (*cp=='u') {         /* unix */
2115                 nlmode_f = LF; cp++;
2116             } else if (*cp=='m') { /* mac */
2117                 nlmode_f = CR; cp++;
2118             } else if (*cp=='w') { /* windows */
2119                 nlmode_f = CRLF; cp++;
2120             } else if (*cp=='0') { /* no conversion  */
2121                 nlmode_f = 0; cp++;
2122             }
2123             continue;
2124 #ifndef PERL_XS
2125         case 'g':
2126             if (*cp == '1') {
2127                 guess_f = 2;
2128                 cp++;
2129             } else if (*cp == '0') {
2130                 guess_f = 1;
2131                 cp++;
2132             } else {
2133                 guess_f = 1;
2134             }
2135             continue;
2136 #endif
2137         case SP:
2138         /* module muliple options in a string are allowed for Perl moudle  */
2139             while(*cp && *cp++!='-');
2140             continue;
2141         default:
2142             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
2143             /* bogus option but ignored */
2144             continue;
2145         }
2146     }
2147 }
2148
2149 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2150 {
2151     if (iconv_func){
2152         struct input_code *p = input_code_list;
2153         while (p->name){
2154             if (iconv_func == p->iconv_func){
2155                 return p;
2156             }
2157             p++;
2158         }
2159     }
2160     return 0;
2161 }
2162
2163 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2164 {
2165 #ifdef INPUT_CODE_FIX
2166     if (f || !input_f)
2167 #endif
2168         if (estab_f != f){
2169             estab_f = f;
2170         }
2171
2172     if (iconv_func
2173 #ifdef INPUT_CODE_FIX
2174         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
2175 #endif
2176         ){
2177         iconv = iconv_func;
2178     }
2179 #ifdef CHECK_OPTION
2180     if (estab_f && iconv_for_check != iconv){
2181         struct input_code *p = find_inputcode_byfunc(iconv);
2182         if (p){
2183             set_input_codename(p->name);
2184             debug(p->name);
2185         }
2186         iconv_for_check = iconv;
2187     }
2188 #endif
2189 }
2190
2191 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
2192 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
2193 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
2194 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
2195 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2196 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
2197 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
2198 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
2199
2200 #define SCORE_INIT (SCORE_iMIME)
2201
2202 static const char score_table_A0[] = {
2203     0, 0, 0, 0,
2204     0, 0, 0, 0,
2205     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2206     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2207 };
2208
2209 static const char score_table_F0[] = {
2210     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2211     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2212     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2213     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2214 };
2215
2216 void set_code_score(struct input_code *ptr, nkf_char score)
2217 {
2218     if (ptr){
2219         ptr->score |= score;
2220     }
2221 }
2222
2223 void clr_code_score(struct input_code *ptr, nkf_char score)
2224 {
2225     if (ptr){
2226         ptr->score &= ~score;
2227     }
2228 }
2229
2230 void code_score(struct input_code *ptr)
2231 {
2232     nkf_char c2 = ptr->buf[0];
2233 #ifdef UTF8_OUTPUT_ENABLE
2234     nkf_char c1 = ptr->buf[1];
2235 #endif
2236     if (c2 < 0){
2237         set_code_score(ptr, SCORE_ERROR);
2238     }else if (c2 == SSO){
2239         set_code_score(ptr, SCORE_KANA);
2240     }else if (c2 == 0x8f){
2241         set_code_score(ptr, SCORE_X0212);
2242 #ifdef UTF8_OUTPUT_ENABLE
2243     }else if (!e2w_conv(c2, c1)){
2244         set_code_score(ptr, SCORE_NO_EXIST);
2245 #endif
2246     }else if ((c2 & 0x70) == 0x20){
2247         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2248     }else if ((c2 & 0x70) == 0x70){
2249         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2250     }else if ((c2 & 0x70) >= 0x50){
2251         set_code_score(ptr, SCORE_L2);
2252     }
2253 }
2254
2255 void status_disable(struct input_code *ptr)
2256 {
2257     ptr->stat = -1;
2258     ptr->buf[0] = -1;
2259     code_score(ptr);
2260     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2261 }
2262
2263 void status_push_ch(struct input_code *ptr, nkf_char c)
2264 {
2265     ptr->buf[ptr->index++] = c;
2266 }
2267
2268 void status_clear(struct input_code *ptr)
2269 {
2270     ptr->stat = 0;
2271     ptr->index = 0;
2272 }
2273
2274 void status_reset(struct input_code *ptr)
2275 {
2276     status_clear(ptr);
2277     ptr->score = SCORE_INIT;
2278 }
2279
2280 void status_reinit(struct input_code *ptr)
2281 {
2282     status_reset(ptr);
2283     ptr->_file_stat = 0;
2284 }
2285
2286 void status_check(struct input_code *ptr, nkf_char c)
2287 {
2288     if (c <= DEL && estab_f){
2289         status_reset(ptr);
2290     }
2291 }
2292
2293 void s_status(struct input_code *ptr, nkf_char c)
2294 {
2295     switch(ptr->stat){
2296       case -1:
2297           status_check(ptr, c);
2298           break;
2299       case 0:
2300           if (c <= DEL){
2301               break;
2302 #ifdef NUMCHAR_OPTION
2303           }else if (is_unicode_capsule(c)){
2304               break;
2305 #endif
2306           }else if (0xa1 <= c && c <= 0xdf){
2307               status_push_ch(ptr, SSO);
2308               status_push_ch(ptr, c);
2309               code_score(ptr);
2310               status_clear(ptr);
2311           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2312               ptr->stat = 1;
2313               status_push_ch(ptr, c);
2314           }else if (0xed <= c && c <= 0xee){
2315               ptr->stat = 3;
2316               status_push_ch(ptr, c);
2317 #ifdef SHIFTJIS_CP932
2318           }else if (is_ibmext_in_sjis(c)){
2319               ptr->stat = 2;
2320               status_push_ch(ptr, c);
2321 #endif /* SHIFTJIS_CP932 */
2322 #ifdef X0212_ENABLE
2323           }else if (0xf0 <= c && c <= 0xfc){
2324               ptr->stat = 1;
2325               status_push_ch(ptr, c);
2326 #endif /* X0212_ENABLE */
2327           }else{
2328               status_disable(ptr);
2329           }
2330           break;
2331       case 1:
2332           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2333               status_push_ch(ptr, c);
2334               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2335               code_score(ptr);
2336               status_clear(ptr);
2337           }else{
2338               status_disable(ptr);
2339           }
2340           break;
2341       case 2:
2342 #ifdef SHIFTJIS_CP932
2343         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2344             status_push_ch(ptr, c);
2345             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2346                 set_code_score(ptr, SCORE_CP932);
2347                 status_clear(ptr);
2348                 break;
2349             }
2350         }
2351 #endif /* SHIFTJIS_CP932 */
2352         status_disable(ptr);
2353           break;
2354       case 3:
2355           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2356               status_push_ch(ptr, c);
2357               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2358             set_code_score(ptr, SCORE_CP932);
2359             status_clear(ptr);
2360           }else{
2361               status_disable(ptr);
2362           }
2363           break;
2364     }
2365 }
2366
2367 void e_status(struct input_code *ptr, nkf_char c)
2368 {
2369     switch (ptr->stat){
2370       case -1:
2371           status_check(ptr, c);
2372           break;
2373       case 0:
2374           if (c <= DEL){
2375               break;
2376 #ifdef NUMCHAR_OPTION
2377           }else if (is_unicode_capsule(c)){
2378               break;
2379 #endif
2380           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2381               ptr->stat = 1;
2382               status_push_ch(ptr, c);
2383 #ifdef X0212_ENABLE
2384           }else if (0x8f == c){
2385               ptr->stat = 2;
2386               status_push_ch(ptr, c);
2387 #endif /* X0212_ENABLE */
2388           }else{
2389               status_disable(ptr);
2390           }
2391           break;
2392       case 1:
2393           if (0xa1 <= c && c <= 0xfe){
2394               status_push_ch(ptr, c);
2395               code_score(ptr);
2396               status_clear(ptr);
2397           }else{
2398               status_disable(ptr);
2399           }
2400           break;
2401 #ifdef X0212_ENABLE
2402       case 2:
2403           if (0xa1 <= c && c <= 0xfe){
2404               ptr->stat = 1;
2405               status_push_ch(ptr, c);
2406           }else{
2407               status_disable(ptr);
2408           }
2409 #endif /* X0212_ENABLE */
2410     }
2411 }
2412
2413 #ifdef UTF8_INPUT_ENABLE
2414 void w_status(struct input_code *ptr, nkf_char c)
2415 {
2416     switch (ptr->stat){
2417       case -1:
2418           status_check(ptr, c);
2419           break;
2420       case 0:
2421           if (c <= DEL){
2422               break;
2423 #ifdef NUMCHAR_OPTION
2424           }else if (is_unicode_capsule(c)){
2425               break;
2426 #endif
2427           }else if (0xc0 <= c && c <= 0xdf){
2428               ptr->stat = 1;
2429               status_push_ch(ptr, c);
2430           }else if (0xe0 <= c && c <= 0xef){
2431               ptr->stat = 2;
2432               status_push_ch(ptr, c);
2433           }else if (0xf0 <= c && c <= 0xf4){
2434               ptr->stat = 3;
2435               status_push_ch(ptr, c);
2436           }else{
2437               status_disable(ptr);
2438           }
2439           break;
2440       case 1:
2441       case 2:
2442           if (0x80 <= c && c <= 0xbf){
2443               status_push_ch(ptr, c);
2444               if (ptr->index > ptr->stat){
2445                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2446                              && ptr->buf[2] == 0xbf);
2447                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2448                            &ptr->buf[0], &ptr->buf[1]);
2449                   if (!bom){
2450                       code_score(ptr);
2451                   }
2452                   status_clear(ptr);
2453               }
2454           }else{
2455               status_disable(ptr);
2456           }
2457           break;
2458       case 3:
2459         if (0x80 <= c && c <= 0xbf){
2460             if (ptr->index < ptr->stat){
2461                 status_push_ch(ptr, c);
2462             } else {
2463                 status_clear(ptr);
2464             }
2465           }else{
2466               status_disable(ptr);
2467           }
2468           break;
2469     }
2470 }
2471 #endif
2472
2473 void code_status(nkf_char c)
2474 {
2475     int action_flag = 1;
2476     struct input_code *result = 0;
2477     struct input_code *p = input_code_list;
2478     while (p->name){
2479         if (!p->status_func) {
2480             ++p;
2481             continue;
2482         }
2483         if (!p->status_func)
2484             continue;
2485         (p->status_func)(p, c);
2486         if (p->stat > 0){
2487             action_flag = 0;
2488         }else if(p->stat == 0){
2489             if (result){
2490                 action_flag = 0;
2491             }else{
2492                 result = p;
2493             }
2494         }
2495         ++p;
2496     }
2497
2498     if (action_flag){
2499         if (result && !estab_f){
2500             set_iconv(TRUE, result->iconv_func);
2501         }else if (c <= DEL){
2502             struct input_code *ptr = input_code_list;
2503             while (ptr->name){
2504                 status_reset(ptr);
2505                 ++ptr;
2506             }
2507         }
2508     }
2509 }
2510
2511 #ifndef WIN32DLL
2512 nkf_char std_getc(FILE *f)
2513 {
2514     if (std_gc_ndx){
2515         return std_gc_buf[--std_gc_ndx];
2516     }
2517     return getc(f);
2518 }
2519 #endif /*WIN32DLL*/
2520
2521 nkf_char std_ungetc(nkf_char c, FILE *f)
2522 {
2523     if (std_gc_ndx == STD_GC_BUFSIZE){
2524         return EOF;
2525     }
2526     std_gc_buf[std_gc_ndx++] = c;
2527     return c;
2528 }
2529
2530 #ifndef WIN32DLL
2531 void std_putc(nkf_char c)
2532 {
2533     if(c!=EOF)
2534       putchar(c);
2535 }
2536 #endif /*WIN32DLL*/
2537
2538 #if !defined(PERL_XS) && !defined(WIN32DLL)
2539 nkf_char noconvert(FILE *f)
2540 {
2541     nkf_char    c;
2542
2543     if (nop_f == 2)
2544         module_connection();
2545     while ((c = (*i_getc)(f)) != EOF)
2546       (*o_putc)(c);
2547     (*o_putc)(EOF);
2548     return 1;
2549 }
2550 #endif
2551
2552 void module_connection(void)
2553 {
2554     oconv = output_conv;
2555     o_putc = std_putc;
2556
2557     /* replace continucation module, from output side */
2558
2559     /* output redicrection */
2560 #ifdef CHECK_OPTION
2561     if (noout_f || guess_f){
2562         o_putc = no_putc;
2563     }
2564 #endif
2565     if (mimeout_f) {
2566         o_mputc = o_putc;
2567         o_putc = mime_putc;
2568         if (mimeout_f == TRUE) {
2569             o_base64conv = oconv; oconv = base64_conv;
2570         }
2571         /* base64_count = 0; */
2572     }
2573
2574     if (nlmode_f || guess_f) {
2575         o_nlconv = oconv; oconv = nl_conv;
2576     }
2577     if (rot_f) {
2578         o_rot_conv = oconv; oconv = rot_conv;
2579     }
2580     if (iso2022jp_f) {
2581         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2582     }
2583     if (hira_f) {
2584         o_hira_conv = oconv; oconv = hira_conv;
2585     }
2586     if (fold_f) {
2587         o_fconv = oconv; oconv = fold_conv;
2588         f_line = 0;
2589     }
2590     if (alpha_f || x0201_f) {
2591         o_zconv = oconv; oconv = z_conv;
2592     }
2593
2594     i_getc = std_getc;
2595     i_ungetc = std_ungetc;
2596     /* input redicrection */
2597 #ifdef INPUT_OPTION
2598     if (cap_f){
2599         i_cgetc = i_getc; i_getc = cap_getc;
2600         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2601     }
2602     if (url_f){
2603         i_ugetc = i_getc; i_getc = url_getc;
2604         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2605     }
2606 #endif
2607 #ifdef NUMCHAR_OPTION
2608     if (numchar_f){
2609         i_ngetc = i_getc; i_getc = numchar_getc;
2610         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2611     }
2612 #endif
2613 #ifdef UNICODE_NORMALIZATION
2614     if (nfc_f && input_f == UTF8_INPUT){
2615         i_nfc_getc = i_getc; i_getc = nfc_getc;
2616         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2617     }
2618 #endif
2619     if (mime_f && mimebuf_f==FIXED_MIME) {
2620         i_mgetc = i_getc; i_getc = mime_getc;
2621         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2622     }
2623     if (broken_f & 1) {
2624         i_bgetc = i_getc; i_getc = broken_getc;
2625         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2626     }
2627     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2628         set_iconv(-TRUE, e_iconv);
2629     } else if (input_f == SJIS_INPUT) {
2630         set_iconv(-TRUE, s_iconv);
2631 #ifdef UTF8_INPUT_ENABLE
2632     } else if (input_f == UTF8_INPUT) {
2633         set_iconv(-TRUE, w_iconv);
2634     } else if (input_f == UTF16_INPUT) {
2635         set_iconv(-TRUE, w_iconv16);
2636     } else if (input_f == UTF32_INPUT) {
2637         set_iconv(-TRUE, w_iconv32);
2638 #endif
2639     } else {
2640         set_iconv(FALSE, e_iconv);
2641     }
2642
2643     {
2644         struct input_code *p = input_code_list;
2645         while (p->name){
2646             status_reinit(p++);
2647         }
2648     }
2649 }
2650
2651 /*
2652  * Check and Ignore BOM
2653  */
2654 void check_bom(FILE *f)
2655 {
2656     int c2;
2657     switch(c2 = (*i_getc)(f)){
2658     case 0x00:
2659         if((c2 = (*i_getc)(f)) == 0x00){
2660             if((c2 = (*i_getc)(f)) == 0xFE){
2661                 if((c2 = (*i_getc)(f)) == 0xFF){
2662                     if(!input_f){
2663                         set_iconv(TRUE, w_iconv32);
2664                     }
2665                     if (iconv == w_iconv32) {
2666                         input_endian = ENDIAN_BIG;
2667                         return;
2668                     }
2669                     (*i_ungetc)(0xFF,f);
2670                 }else (*i_ungetc)(c2,f);
2671                 (*i_ungetc)(0xFE,f);
2672             }else if(c2 == 0xFF){
2673                 if((c2 = (*i_getc)(f)) == 0xFE){
2674                     if(!input_f){
2675                         set_iconv(TRUE, w_iconv32);
2676                     }
2677                     if (iconv == w_iconv32) {
2678                         input_endian = ENDIAN_2143;
2679                         return;
2680                     }
2681                     (*i_ungetc)(0xFF,f);
2682                 }else (*i_ungetc)(c2,f);
2683                 (*i_ungetc)(0xFF,f);
2684             }else (*i_ungetc)(c2,f);
2685             (*i_ungetc)(0x00,f);
2686         }else (*i_ungetc)(c2,f);
2687         (*i_ungetc)(0x00,f);
2688         break;
2689     case 0xEF:
2690         if((c2 = (*i_getc)(f)) == 0xBB){
2691             if((c2 = (*i_getc)(f)) == 0xBF){
2692                 if(!input_f){
2693                     set_iconv(TRUE, w_iconv);
2694                 }
2695                 if (iconv == w_iconv) {
2696                     return;
2697                 }
2698                 (*i_ungetc)(0xBF,f);
2699             }else (*i_ungetc)(c2,f);
2700             (*i_ungetc)(0xBB,f);
2701         }else (*i_ungetc)(c2,f);
2702         (*i_ungetc)(0xEF,f);
2703         break;
2704     case 0xFE:
2705         if((c2 = (*i_getc)(f)) == 0xFF){
2706             if((c2 = (*i_getc)(f)) == 0x00){
2707                 if((c2 = (*i_getc)(f)) == 0x00){
2708                     if(!input_f){
2709                         set_iconv(TRUE, w_iconv32);
2710                     }
2711                     if (iconv == w_iconv32) {
2712                         input_endian = ENDIAN_3412;
2713                         return;
2714                     }
2715                     (*i_ungetc)(0x00,f);
2716                 }else (*i_ungetc)(c2,f);
2717                 (*i_ungetc)(0x00,f);
2718             }else (*i_ungetc)(c2,f);
2719             if(!input_f){
2720                 set_iconv(TRUE, w_iconv16);
2721             }
2722             if (iconv == w_iconv16) {
2723                 input_endian = ENDIAN_BIG;
2724                 return;
2725             }
2726             (*i_ungetc)(0xFF,f);
2727         }else (*i_ungetc)(c2,f);
2728         (*i_ungetc)(0xFE,f);
2729         break;
2730     case 0xFF:
2731         if((c2 = (*i_getc)(f)) == 0xFE){
2732             if((c2 = (*i_getc)(f)) == 0x00){
2733                 if((c2 = (*i_getc)(f)) == 0x00){
2734                     if(!input_f){
2735                         set_iconv(TRUE, w_iconv32);
2736                     }
2737                     if (iconv == w_iconv32) {
2738                         input_endian = ENDIAN_LITTLE;
2739                         return;
2740                     }
2741                     (*i_ungetc)(0x00,f);
2742                 }else (*i_ungetc)(c2,f);
2743                 (*i_ungetc)(0x00,f);
2744             }else (*i_ungetc)(c2,f);
2745             if(!input_f){
2746                 set_iconv(TRUE, w_iconv16);
2747             }
2748             if (iconv == w_iconv16) {
2749                 input_endian = ENDIAN_LITTLE;
2750                 return;
2751             }
2752             (*i_ungetc)(0xFE,f);
2753         }else (*i_ungetc)(c2,f);
2754         (*i_ungetc)(0xFF,f);
2755         break;
2756     default:
2757         (*i_ungetc)(c2,f);
2758         break;
2759     }
2760 }
2761
2762 /*
2763    Conversion main loop. Code detection only.
2764  */
2765
2766 nkf_char kanji_convert(FILE *f)
2767 {
2768     nkf_char    c3, c2=0, c1, c0=0;
2769     int is_8bit = FALSE;
2770
2771     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2772 #ifdef UTF8_INPUT_ENABLE
2773        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2774 #endif
2775       ){
2776         is_8bit = TRUE;
2777     }
2778
2779     input_mode = ASCII;
2780     output_mode = ASCII;
2781     shift_mode = FALSE;
2782
2783 #define NEXT continue      /* no output, get next */
2784 #define SEND ;             /* output c1 and c2, get next */
2785 #define LAST break         /* end of loop, go closing  */
2786
2787     module_connection();
2788     check_bom(f);
2789
2790     while ((c1 = (*i_getc)(f)) != EOF) {
2791 #ifdef INPUT_CODE_FIX
2792         if (!input_f)
2793 #endif
2794             code_status(c1);
2795         if (c2) {
2796             /* second byte */
2797             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2798                 /* in case of 8th bit is on */
2799                 if (!estab_f&&!mime_decode_mode) {
2800                     /* in case of not established yet */
2801                     /* It is still ambiguious */
2802                     if (h_conv(f, c2, c1)==EOF)
2803                         LAST;
2804                     else
2805                         c2 = 0;
2806                     NEXT;
2807                 } else {
2808                     /* in case of already established */
2809                     if (c1 < AT) {
2810                         /* ignore bogus code and not CP5022x UCD */
2811                         c2 = 0;
2812                         NEXT;
2813                     } else {
2814                         SEND;
2815                     }
2816                 }
2817             } else
2818                 /* second byte, 7 bit code */
2819                 /* it might be kanji shitfted */
2820                 if ((c1 == DEL) || (c1 <= SP)) {
2821                     /* ignore bogus first code */
2822                     c2 = 0;
2823                     NEXT;
2824                 } else
2825                     SEND;
2826         } else {
2827             /* first byte */
2828 #ifdef UTF8_INPUT_ENABLE
2829             if (iconv == w_iconv16) {
2830                 if (input_endian == ENDIAN_BIG) {
2831                     c2 = c1;
2832                     if ((c1 = (*i_getc)(f)) != EOF) {
2833                         if (0xD8 <= c2 && c2 <= 0xDB) {
2834                             if ((c0 = (*i_getc)(f)) != EOF) {
2835                                 c0 <<= 8;
2836                                 if ((c3 = (*i_getc)(f)) != EOF) {
2837                                     c0 |= c3;
2838                                 } else c2 = EOF;
2839                             } else c2 = EOF;
2840                         }
2841                     } else c2 = EOF;
2842                 } else {
2843                     if ((c2 = (*i_getc)(f)) != EOF) {
2844                         if (0xD8 <= c2 && c2 <= 0xDB) {
2845                             if ((c3 = (*i_getc)(f)) != EOF) {
2846                                 if ((c0 = (*i_getc)(f)) != EOF) {
2847                                     c0 <<= 8;
2848                                     c0 |= c3;
2849                                 } else c2 = EOF;
2850                             } else c2 = EOF;
2851                         }
2852                     } else c2 = EOF;
2853                 }
2854                 SEND;
2855             } else if(iconv == w_iconv32){
2856                 int c3 = c1;
2857                 if((c2 = (*i_getc)(f)) != EOF &&
2858                    (c1 = (*i_getc)(f)) != EOF &&
2859                    (c0 = (*i_getc)(f)) != EOF){
2860                     switch(input_endian){
2861                     case ENDIAN_BIG:
2862                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2863                         break;
2864                     case ENDIAN_LITTLE:
2865                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2866                         break;
2867                     case ENDIAN_2143:
2868                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2869                         break;
2870                     case ENDIAN_3412:
2871                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2872                         break;
2873                     }
2874                     c2 = 0;
2875                 }else{
2876                     c2 = EOF;
2877                 }
2878                 SEND;
2879             } else
2880 #endif
2881 #ifdef NUMCHAR_OPTION
2882             if (is_unicode_capsule(c1)){
2883                 SEND;
2884             } else
2885 #endif
2886             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2887                 /* 8 bit code */
2888                 if (!estab_f && !iso8859_f) {
2889                     /* not established yet */
2890                     c2 = c1;
2891                     NEXT;
2892                 } else { /* estab_f==TRUE */
2893                     if (iso8859_f) {
2894                         c2 = ISO_8859_1;
2895                         c1 &= 0x7f;
2896                         SEND;
2897                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2898                         /* SJIS X0201 Case... */
2899                         if (iso2022jp_f && !x0201_f) {
2900                             (*oconv)(GETA1, GETA2);
2901                             NEXT;
2902                         } else {
2903                             c2 = JIS_X_0201;
2904                             c1 &= 0x7f;
2905                             SEND;
2906                         }
2907                     } else if (c1==SSO && iconv != s_iconv) {
2908                         /* EUC X0201 Case */
2909                         c1 = (*i_getc)(f);  /* skip SSO */
2910                         code_status(c1);
2911                         if (SSP<=c1 && c1<0xe0) {
2912                             if (iso2022jp_f && !x0201_f) {
2913                                 (*oconv)(GETA1, GETA2);
2914                                 NEXT;
2915                             } else {
2916                                 c2 = JIS_X_0201;
2917                                 c1 &= 0x7f;
2918                                 SEND;
2919                             }
2920                         } else  { /* bogus code, skip SSO and one byte */
2921                             NEXT;
2922                         }
2923                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2924                                (c1 == 0xFD || c1 == 0xFE)) {
2925                         /* CP10001 */
2926                         c2 = JIS_X_0201;
2927                         c1 &= 0x7f;
2928                         SEND;
2929                     } else {
2930                        /* already established */
2931                        c2 = c1;
2932                        NEXT;
2933                     }
2934                 }
2935             } else if ((c1 > SP) && (c1 != DEL)) {
2936                 /* in case of Roman characters */
2937                 if (shift_mode) {
2938                     /* output 1 shifted byte */
2939                     if (iso8859_f) {
2940                         c2 = ISO_8859_1;
2941                         SEND;
2942                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2943                       /* output 1 shifted byte */
2944                         if (iso2022jp_f && !x0201_f) {
2945                             (*oconv)(GETA1, GETA2);
2946                             NEXT;
2947                         } else {
2948                             c2 = JIS_X_0201;
2949                             SEND;
2950                         }
2951                     } else {
2952                         /* look like bogus code */
2953                         NEXT;
2954                     }
2955                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
2956                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
2957                     /* in case of Kanji shifted */
2958                     c2 = c1;
2959                     NEXT;
2960                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2961                     /* Check MIME code */
2962                     if ((c1 = (*i_getc)(f)) == EOF) {
2963                         (*oconv)(0, '=');
2964                         LAST;
2965                     } else if (c1 == '?') {
2966                         /* =? is mime conversion start sequence */
2967                         if(mime_f == STRICT_MIME) {
2968                             /* check in real detail */
2969                             if (mime_begin_strict(f) == EOF)
2970                                 LAST;
2971                             else
2972                                 NEXT;
2973                         } else if (mime_begin(f) == EOF)
2974                             LAST;
2975                         else
2976                             NEXT;
2977                     } else {
2978                         (*oconv)(0, '=');
2979                         (*i_ungetc)(c1,f);
2980                         NEXT;
2981                     }
2982                 } else {
2983                     /* normal ASCII code */
2984                     SEND;
2985                 }
2986             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2987                 shift_mode = FALSE;
2988                 NEXT;
2989             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2990                 shift_mode = TRUE;
2991                 NEXT;
2992             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2993                 if ((c1 = (*i_getc)(f)) == EOF) {
2994                     /*  (*oconv)(0, ESC); don't send bogus code */
2995                     LAST;
2996                 } else if (c1 == '$') {
2997                     if ((c1 = (*i_getc)(f)) == EOF) {
2998                         /*
2999                         (*oconv)(0, ESC); don't send bogus code
3000                         (*oconv)(0, '$'); */
3001                         LAST;
3002                     } else if (c1 == '@'|| c1 == 'B') {
3003                         /* This is kanji introduction */
3004                         input_mode = JIS_X_0208;
3005                         shift_mode = FALSE;
3006                         set_input_codename("ISO-2022-JP");
3007 #ifdef CHECK_OPTION
3008                         debug("ISO-2022-JP");
3009 #endif
3010                         NEXT;
3011                     } else if (c1 == '(') {
3012                         if ((c1 = (*i_getc)(f)) == EOF) {
3013                             /* don't send bogus code
3014                             (*oconv)(0, ESC);
3015                             (*oconv)(0, '$');
3016                             (*oconv)(0, '(');
3017                                 */
3018                             LAST;
3019                         } else if (c1 == '@'|| c1 == 'B') {
3020                             /* This is kanji introduction */
3021                             input_mode = JIS_X_0208;
3022                             shift_mode = FALSE;
3023                             NEXT;
3024 #ifdef X0212_ENABLE
3025                         } else if (c1 == 'D'){
3026                             input_mode = JIS_X_0212;
3027                             shift_mode = FALSE;
3028                             NEXT;
3029 #endif /* X0212_ENABLE */
3030                         } else if (c1 == (JIS_X_0213_1&0x7F)){
3031                             input_mode = JIS_X_0213_1;
3032                             shift_mode = FALSE;
3033                             NEXT;
3034                         } else if (c1 == (JIS_X_0213_2&0x7F)){
3035                             input_mode = JIS_X_0213_2;
3036                             shift_mode = FALSE;
3037                             NEXT;
3038                         } else {
3039                             /* could be some special code */
3040                             (*oconv)(0, ESC);
3041                             (*oconv)(0, '$');
3042                             (*oconv)(0, '(');
3043                             (*oconv)(0, c1);
3044                             NEXT;
3045                         }
3046                     } else if (broken_f&0x2) {
3047                         /* accept any ESC-(-x as broken code ... */
3048                         input_mode = JIS_X_0208;
3049                         shift_mode = FALSE;
3050                         NEXT;
3051                     } else {
3052                         (*oconv)(0, ESC);
3053                         (*oconv)(0, '$');
3054                         (*oconv)(0, c1);
3055                         NEXT;
3056                     }
3057                 } else if (c1 == '(') {
3058                     if ((c1 = (*i_getc)(f)) == EOF) {
3059                         /* don't send bogus code
3060                         (*oconv)(0, ESC);
3061                         (*oconv)(0, '('); */
3062                         LAST;
3063                     } else {
3064                         if (c1 == 'I') {
3065                             /* This is X0201 kana introduction */
3066                             input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
3067                             NEXT;
3068                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
3069                             /* This is X0208 kanji introduction */
3070                             input_mode = ASCII; shift_mode = FALSE;
3071                             NEXT;
3072                         } else if (broken_f&0x2) {
3073                             input_mode = ASCII; shift_mode = FALSE;
3074                             NEXT;
3075                         } else {
3076                             (*oconv)(0, ESC);
3077                             (*oconv)(0, '(');
3078                             /* maintain various input_mode here */
3079                             SEND;
3080                         }
3081                     }
3082                } else if ( c1 == 'N' || c1 == 'n'){
3083                    /* SS2 */
3084                    c3 = (*i_getc)(f);  /* skip SS2 */
3085                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
3086                        c1 = c3;
3087                        c2 = JIS_X_0201;
3088                        SEND;
3089                    }else{
3090                        (*i_ungetc)(c3, f);
3091                        /* lonely ESC  */
3092                        (*oconv)(0, ESC);
3093                        SEND;
3094                    }
3095                 } else {
3096                     /* lonely ESC  */
3097                     (*oconv)(0, ESC);
3098                     SEND;
3099                 }
3100             } else if (c1 == ESC && iconv == s_iconv) {
3101                 /* ESC in Shift_JIS */
3102                 if ((c1 = (*i_getc)(f)) == EOF) {
3103                     /*  (*oconv)(0, ESC); don't send bogus code */
3104                     LAST;
3105                 } else if (c1 == '$') {
3106                     /* J-PHONE emoji */
3107                     if ((c1 = (*i_getc)(f)) == EOF) {
3108                         /*
3109                            (*oconv)(0, ESC); don't send bogus code
3110                            (*oconv)(0, '$'); */
3111                         LAST;
3112                     } else {
3113                         if (('E' <= c1 && c1 <= 'G') ||
3114                             ('O' <= c1 && c1 <= 'Q')) {
3115                             /*
3116                                NUM : 0 1 2 3 4 5
3117                                BYTE: G E F O P Q
3118                                C%7 : 1 6 0 2 3 4
3119                                C%7 : 0 1 2 3 4 5 6
3120                                NUM : 2 0 3 4 5 X 1
3121                              */
3122                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
3123                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
3124                             while ((c1 = (*i_getc)(f)) != EOF) {
3125                                 if (SP <= c1 && c1 <= 'z') {
3126                                     (*oconv)(0, c1 + c0);
3127                                 } else break; /* c1 == SO */
3128                             }
3129                         }
3130                     }
3131                     if (c1 == EOF) LAST;
3132                     NEXT;
3133                 } else {
3134                     /* lonely ESC  */
3135                     (*oconv)(0, ESC);
3136                     SEND;
3137                 }
3138             } else if (c1 == LF || c1 == CR) {
3139                 if (broken_f&4) {
3140                     input_mode = ASCII; set_iconv(FALSE, 0);
3141                     SEND;
3142                 } else if (mime_decode_f && !mime_decode_mode){
3143                     if (c1 == LF) {
3144                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
3145                             i_ungetc(SP,f);
3146                             continue;
3147                         } else {
3148                             i_ungetc(c1,f);
3149                         }
3150                         c1 = LF;
3151                         SEND;
3152                     } else  { /* if (c1 == CR)*/
3153                         if ((c1=(*i_getc)(f))!=EOF) {
3154                             if (c1==SP) {
3155                                 i_ungetc(SP,f);
3156                                 continue;
3157                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
3158                                 i_ungetc(SP,f);
3159                                 continue;
3160                             } else {
3161                                 i_ungetc(c1,f);
3162                             }
3163                             i_ungetc(LF,f);
3164                         } else {
3165                             i_ungetc(c1,f);
3166                         }
3167                         c1 = CR;
3168                         SEND;
3169                     }
3170                 }
3171             } else if (c1 == DEL && input_mode == JIS_X_0208) {
3172                 /* CP5022x */
3173                 c2 = c1;
3174                 NEXT;
3175             } else
3176                 SEND;
3177         }
3178         /* send: */
3179         switch(input_mode){
3180         case ASCII:
3181             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
3182             case -2:
3183                 /* 4 bytes UTF-8 */
3184                 if ((c0 = (*i_getc)(f)) != EOF) {
3185                     code_status(c0);
3186                     c0 <<= 8;
3187                     if ((c3 = (*i_getc)(f)) != EOF) {
3188                         code_status(c3);
3189                         (*iconv)(c2, c1, c0|c3);
3190                     }
3191                 }
3192                 break;
3193             case -1:
3194                 /* 3 bytes EUC or UTF-8 */
3195                 if ((c0 = (*i_getc)(f)) != EOF) {
3196                     code_status(c0);
3197                     (*iconv)(c2, c1, c0);
3198                 }
3199                 break;
3200             }
3201             break;
3202         case JIS_X_0208:
3203         case JIS_X_0213_1:
3204             if (ms_ucs_map_f &&
3205                 0x7F <= c2 && c2 <= 0x92 &&
3206                 0x21 <= c1 && c1 <= 0x7E) {
3207                 /* CP932 UDC */
3208                 if(c1 == 0x7F) return 0;
3209                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
3210                 c2 = 0;
3211             }
3212             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
3213             break;
3214 #ifdef X0212_ENABLE
3215         case JIS_X_0212:
3216             (*oconv)(PREFIX_EUCG3 | c2, c1);
3217             break;
3218 #endif /* X0212_ENABLE */
3219         case JIS_X_0213_2:
3220             (*oconv)(PREFIX_EUCG3 | c2, c1);
3221             break;
3222         default:
3223             (*oconv)(input_mode, c1);  /* other special case */
3224         }
3225
3226         c2 = 0;
3227         c0 = 0;
3228         continue;
3229         /* goto next_word */
3230     }
3231
3232     /* epilogue */
3233     (*iconv)(EOF, 0, 0);
3234     if (!input_codename)
3235     {
3236         if (is_8bit) {
3237             struct input_code *p = input_code_list;
3238             struct input_code *result = p;
3239             while (p->name){
3240                 if (p->score < result->score) result = p;
3241                 ++p;
3242             }
3243             set_input_codename(result->name);
3244 #ifdef CHECK_OPTION
3245             debug(result->name);
3246 #endif
3247         }
3248     }
3249     return 1;
3250 }
3251
3252 nkf_char
3253 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3254 {
3255     nkf_char ret, c3, c0;
3256     int hold_index;
3257
3258
3259     /** it must NOT be in the kanji shifte sequence      */
3260     /** it must NOT be written in JIS7                   */
3261     /** and it must be after 2 byte 8bit code            */
3262
3263     hold_count = 0;
3264     push_hold_buf(c2);
3265     push_hold_buf(c1);
3266
3267     while ((c1 = (*i_getc)(f)) != EOF) {
3268         if (c1 == ESC){
3269             (*i_ungetc)(c1,f);
3270             break;
3271         }
3272         code_status(c1);
3273         if (push_hold_buf(c1) == EOF || estab_f){
3274             break;
3275         }
3276     }
3277
3278     if (!estab_f){
3279         struct input_code *p = input_code_list;
3280         struct input_code *result = p;
3281         if (c1 == EOF){
3282             code_status(c1);
3283         }
3284         while (p->name){
3285             if (p->status_func && p->score < result->score){
3286                 result = p;
3287             }
3288             ++p;
3289         }
3290         set_iconv(TRUE, result->iconv_func);
3291     }
3292
3293
3294     /** now,
3295      ** 1) EOF is detected, or
3296      ** 2) Code is established, or
3297      ** 3) Buffer is FULL (but last word is pushed)
3298      **
3299      ** in 1) and 3) cases, we continue to use
3300      ** Kanji codes by oconv and leave estab_f unchanged.
3301      **/
3302
3303     ret = c1;
3304     hold_index = 0;
3305     while (hold_index < hold_count){
3306         c2 = hold_buf[hold_index++];
3307         if (c2 <= DEL
3308 #ifdef NUMCHAR_OPTION
3309             || is_unicode_capsule(c2)
3310 #endif
3311             ){
3312             (*iconv)(0, c2, 0);
3313             continue;
3314         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3315             (*iconv)(JIS_X_0201, c2, 0);
3316             continue;
3317         }
3318         if (hold_index < hold_count){
3319             c1 = hold_buf[hold_index++];
3320         }else{
3321             c1 = (*i_getc)(f);
3322             if (c1 == EOF){
3323                 c3 = EOF;
3324                 break;
3325             }
3326             code_status(c1);
3327         }
3328         c0 = 0;
3329         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3330         case -2:
3331             /* 4 bytes UTF-8 */
3332             if (hold_index < hold_count){
3333                 c0 = hold_buf[hold_index++];
3334             } else if ((c0 = (*i_getc)(f)) == EOF) {
3335                 ret = EOF;
3336                 break;
3337             } else {
3338                 code_status(c0);
3339                 c0 <<= 8;
3340                 if (hold_index < hold_count){
3341                     c3 = hold_buf[hold_index++];
3342                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3343                     c0 = ret = EOF;
3344                     break;
3345                 } else {
3346                     code_status(c3);
3347                     (*iconv)(c2, c1, c0|c3);
3348                 }
3349             }
3350             break;
3351         case -1:
3352             /* 3 bytes EUC or UTF-8 */
3353             if (hold_index < hold_count){
3354                 c0 = hold_buf[hold_index++];
3355             } else if ((c0 = (*i_getc)(f)) == EOF) {
3356                 ret = EOF;
3357                 break;
3358             } else {
3359                 code_status(c0);
3360             }
3361             (*iconv)(c2, c1, c0);
3362             break;
3363         }
3364         if (c0 == EOF) break;
3365     }
3366     return ret;
3367 }
3368
3369 nkf_char push_hold_buf(nkf_char c2)
3370 {
3371     if (hold_count >= HOLD_SIZE*2)
3372         return (EOF);
3373     hold_buf[hold_count++] = (unsigned char)c2;
3374     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3375 }
3376
3377 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3378 {
3379 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3380     nkf_char val;
3381 #endif
3382     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3383 #ifdef SHIFTJIS_CP932
3384     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3385         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3386         if (val){
3387             c2 = val >> 8;
3388             c1 = val & 0xff;
3389         }
3390     }
3391     if (cp932inv_f
3392         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3393         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3394         if (c){
3395             c2 = c >> 8;
3396             c1 = c & 0xff;
3397         }
3398     }
3399 #endif /* SHIFTJIS_CP932 */
3400 #ifdef X0212_ENABLE
3401     if (!x0213_f && is_ibmext_in_sjis(c2)){
3402         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3403         if (val){
3404             if (val > 0x7FFF){
3405                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3406                 c1 = val & 0xff;
3407             }else{
3408                 c2 = val >> 8;
3409                 c1 = val & 0xff;
3410             }
3411             if (p2) *p2 = c2;
3412             if (p1) *p1 = c1;
3413             return 0;
3414         }
3415     }
3416 #endif
3417     if(c2 >= 0x80){
3418         if(x0213_f && c2 >= 0xF0){
3419             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3420                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3421             }else{ /* 78<=k<=94 */
3422                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3423                 if (0x9E < c1) c2++;
3424             }
3425         }else{
3426             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3427             if (0x9E < c1) c2++;
3428         }
3429         if (c1 < 0x9F)
3430             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3431         else {
3432             c1 = c1 - 0x7E;
3433         }
3434     }
3435
3436 #ifdef X0212_ENABLE
3437     c2 = x0212_unshift(c2);
3438 #endif
3439     if (p2) *p2 = c2;
3440     if (p1) *p1 = c1;
3441     return 0;
3442 }
3443
3444 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3445 {
3446     if (c2 == JIS_X_0201) {
3447         c1 &= 0x7f;
3448     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3449         /* NOP */
3450     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3451         /* CP932 UDC */
3452         if(c1 == 0x7F) return 0;
3453         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3454         c2 = 0;
3455     } else {
3456         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3457         if (ret) return ret;
3458     }
3459     (*oconv)(c2, c1);
3460     return 0;
3461 }
3462
3463 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3464 {
3465     if (c2 == JIS_X_0201) {
3466         c1 &= 0x7f;
3467 #ifdef X0212_ENABLE
3468     }else if (c2 == 0x8f){
3469         if (c0 == 0){
3470             return -1;
3471         }
3472         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3473             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3474             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3475             c2 = 0;
3476         } else {
3477             c2 = (c2 << 8) | (c1 & 0x7f);
3478             c1 = c0 & 0x7f;
3479 #ifdef SHIFTJIS_CP932
3480             if (cp51932_f){
3481                 nkf_char s2, s1;
3482                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3483                     s2e_conv(s2, s1, &c2, &c1);
3484                     if (c2 < 0x100){
3485                         c1 &= 0x7f;
3486                         c2 &= 0x7f;
3487                     }
3488                 }
3489             }
3490 #endif /* SHIFTJIS_CP932 */
3491         }
3492 #endif /* X0212_ENABLE */
3493     } else if (c2 == SSO){
3494         c2 = JIS_X_0201;
3495         c1 &= 0x7f;
3496     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3497         /* NOP */
3498     } else {
3499         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3500             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3501             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3502             c2 = 0;
3503         } else {
3504             c1 &= 0x7f;
3505             c2 &= 0x7f;
3506 #ifdef SHIFTJIS_CP932
3507             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3508                 nkf_char s2, s1;
3509                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3510                     s2e_conv(s2, s1, &c2, &c1);
3511                     if (c2 < 0x100){
3512                         c1 &= 0x7f;
3513                         c2 &= 0x7f;
3514                     }
3515                 }
3516             }
3517 #endif /* SHIFTJIS_CP932 */
3518         }
3519     }
3520     (*oconv)(c2, c1);
3521     return 0;
3522 }
3523
3524 #ifdef UTF8_INPUT_ENABLE
3525 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3526 {
3527     nkf_char ret = 0;
3528
3529     if (!c1){
3530         *p2 = 0;
3531         *p1 = c2;
3532     }else if (0xc0 <= c2 && c2 <= 0xef) {
3533         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3534 #ifdef NUMCHAR_OPTION
3535         if (ret > 0){
3536             if (p2) *p2 = 0;
3537             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3538             ret = 0;
3539         }
3540 #endif
3541     }
3542     return ret;
3543 }
3544
3545 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3546 {
3547     nkf_char ret = 0;
3548     static const char w_iconv_utf8_1st_byte[] =
3549     { /* 0xC0 - 0xFF */
3550         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3551         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3552         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3553         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3554
3555     if (c2 < 0 || 0xff < c2) {
3556     }else if (c2 == 0) { /* 0 : 1 byte*/
3557         c0 = 0;
3558     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3559         return 0;
3560     } else{
3561         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3562         case 21:
3563             if (c1 < 0x80 || 0xBF < c1) return 0;
3564             break;
3565         case 30:
3566             if (c0 == 0) return -1;
3567             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3568                 return 0;
3569             break;
3570         case 31:
3571         case 33:
3572             if (c0 == 0) return -1;
3573             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3574                 return 0;
3575             break;
3576         case 32:
3577             if (c0 == 0) return -1;
3578             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3579                 return 0;
3580             break;
3581         case 40:
3582             if (c0 == 0) return -2;
3583             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3584                 return 0;
3585             break;
3586         case 41:
3587             if (c0 == 0) return -2;
3588             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3589                 return 0;
3590             break;
3591         case 42:
3592             if (c0 == 0) return -2;
3593             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3594                 return 0;
3595             break;
3596         default:
3597             return 0;
3598             break;
3599         }
3600     }
3601     if (c2 == 0 || c2 == EOF){
3602     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3603         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3604         c2 = 0;
3605     } else {
3606         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3607     }
3608     if (ret == 0){
3609         (*oconv)(c2, c1);
3610     }
3611     return ret;
3612 }
3613 #endif
3614
3615 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3616 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3617 {
3618     val &= VALUE_MASK;
3619     if (val < 0x80){
3620         *p2 = val;
3621         *p1 = 0;
3622         *p0 = 0;
3623     }else if (val < 0x800){
3624         *p2 = 0xc0 | (val >> 6);
3625         *p1 = 0x80 | (val & 0x3f);
3626         *p0 = 0;
3627     } else if (val <= NKF_INT32_C(0xFFFF)) {
3628         *p2 = 0xe0 | (val >> 12);
3629         *p1 = 0x80 | ((val >> 6) & 0x3f);
3630         *p0 = 0x80 | (val        & 0x3f);
3631     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3632         *p2 = 0xe0 |  (val >> 16);
3633         *p1 = 0x80 | ((val >> 12) & 0x3f);
3634         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3635     } else {
3636         *p2 = 0;
3637         *p1 = 0;
3638         *p0 = 0;
3639     }
3640 }
3641 #endif
3642
3643 #ifdef UTF8_INPUT_ENABLE
3644 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3645 {
3646     nkf_char val;
3647     if (c2 >= 0xf8) {
3648         val = -1;
3649     } else if (c2 >= 0xf0){
3650         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3651         val = (c2 & 0x0f) << 18;
3652         val |= (c1 & 0x3f) << 12;
3653         val |= (c0 & 0x3f00) >> 2;
3654         val |= (c0 & 0x3f);
3655     }else if (c2 >= 0xe0){
3656         val = (c2 & 0x0f) << 12;
3657         val |= (c1 & 0x3f) << 6;
3658         val |= (c0 & 0x3f);
3659     }else if (c2 >= 0xc0){
3660         val = (c2 & 0x1f) << 6;
3661         val |= (c1 & 0x3f);
3662     }else{
3663         val = c2;
3664     }
3665     return val;
3666 }
3667
3668 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3669 {
3670     nkf_char c2, c1, c0;
3671     nkf_char ret = 0;
3672     val &= VALUE_MASK;
3673     if (val < 0x80){
3674         *p2 = 0;
3675         *p1 = val;
3676     }else{
3677         w16w_conv(val, &c2, &c1, &c0);
3678         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3679 #ifdef NUMCHAR_OPTION
3680         if (ret > 0){
3681             *p2 = 0;
3682             *p1 = CLASS_UNICODE | val;
3683             ret = 0;
3684         }
3685 #endif
3686     }
3687     return ret;
3688 }
3689 #endif
3690
3691 #ifdef UTF8_INPUT_ENABLE
3692 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3693 {
3694     nkf_char ret = 0;
3695     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3696         (*oconv)(c2, c1);
3697         return 0;
3698     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3699         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3700             return -2;
3701         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3702         c2 = 0;
3703     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3704         /*
3705            return 2;
3706         */
3707         return 1;
3708     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3709     if (ret) return ret;
3710     (*oconv)(c2, c1);
3711     return 0;
3712 }
3713
3714 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3715 {
3716     int ret = 0;
3717
3718     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3719     } else if (is_unicode_bmp(c1)) {
3720         ret = w16e_conv(c1, &c2, &c1);
3721     } else {
3722         c2 = 0;
3723         c1 =  CLASS_UNICODE | c1;
3724     }
3725     if (ret) return ret;
3726     (*oconv)(c2, c1);
3727     return 0;
3728 }
3729
3730 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3731 {
3732     const unsigned short *const *pp;
3733     const unsigned short *const *const *ppp;
3734     static const char no_best_fit_chars_table_C2[] =
3735     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3736         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3737         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3738         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3739     static const char no_best_fit_chars_table_C2_ms[] =
3740     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3741         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3742         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3743         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3744     static const char no_best_fit_chars_table_932_C2[] =
3745     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3746         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3747         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3748         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3749     static const char no_best_fit_chars_table_932_C3[] =
3750     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3751         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3752         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3753         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3754     nkf_char ret = 0;
3755
3756     if(c2 < 0x80){
3757         *p2 = 0;
3758         *p1 = c2;
3759     }else if(c2 < 0xe0){
3760         if(no_best_fit_chars_f){
3761             if(ms_ucs_map_f == UCS_MAP_CP932){
3762                 switch(c2){
3763                 case 0xC2:
3764                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3765                     break;
3766                 case 0xC3:
3767                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3768                     break;
3769                 }
3770             }else if(!cp932inv_f){
3771                 switch(c2){
3772                 case 0xC2:
3773                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3774                     break;
3775                 case 0xC3:
3776                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3777                     break;
3778                 }
3779             }else if(ms_ucs_map_f == UCS_MAP_MS){
3780                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3781             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3782                 switch(c2){
3783                 case 0xC2:
3784                     switch(c1){
3785                     case 0xA2:
3786                     case 0xA3:
3787                     case 0xA5:
3788                     case 0xA6:
3789                     case 0xAC:
3790                     case 0xAF:
3791                     case 0xB8:
3792                         return 1;
3793                     }
3794                     break;
3795                 }
3796             }
3797         }
3798         pp =
3799             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3800             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3801             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3802             utf8_to_euc_2bytes;
3803         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3804     }else if(c0 < 0xF0){
3805         if(no_best_fit_chars_f){
3806             if(ms_ucs_map_f == UCS_MAP_CP932){
3807                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3808             }else if(ms_ucs_map_f == UCS_MAP_MS){
3809                 switch(c2){
3810                 case 0xE2:
3811                     switch(c1){
3812                     case 0x80:
3813                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3814                         break;
3815                     case 0x88:
3816                         if(c0 == 0x92) return 1;
3817                         break;
3818                     }
3819                     break;
3820                 case 0xE3:
3821                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3822                     break;
3823                 }
3824             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3825                 switch(c2){
3826                 case 0xE3:
3827                     switch(c1){
3828                     case 0x82:
3829                             if(c0 == 0x94) return 1;
3830                         break;
3831                     case 0x83:
3832                             if(c0 == 0xBB) return 1;
3833                         break;
3834                     }
3835                     break;
3836                 }
3837             }else{
3838                 switch(c2){
3839                 case 0xE2:
3840                     switch(c1){
3841                     case 0x80:
3842                         if(c0 == 0x95) return 1;
3843                         break;
3844                     case 0x88:
3845                         if(c0 == 0xA5) return 1;
3846                         break;
3847                     }
3848                     break;
3849                 case 0xEF:
3850                     switch(c1){
3851                     case 0xBC:
3852                         if(c0 == 0x8D) return 1;
3853                         break;
3854                     case 0xBD:
3855                         if(c0 == 0x9E && !cp932inv_f) return 1;
3856                         break;
3857                     case 0xBF:
3858                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3859                         break;
3860                     }
3861                     break;
3862                 }
3863             }
3864         }
3865         ppp =
3866             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3867             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3868             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3869             utf8_to_euc_3bytes;
3870         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3871     }else return -1;
3872 #ifdef SHIFTJIS_CP932
3873     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3874         nkf_char s2, s1;
3875         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3876             s2e_conv(s2, s1, p2, p1);
3877         }else{
3878             ret = 1;
3879         }
3880     }
3881 #endif
3882     return ret;
3883 }
3884
3885 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3886 {
3887     nkf_char c2;
3888     const unsigned short *p;
3889     unsigned short val;
3890
3891     if (pp == 0) return 1;
3892
3893     c1 -= 0x80;
3894     if (c1 < 0 || psize <= c1) return 1;
3895     p = pp[c1];
3896     if (p == 0)  return 1;
3897
3898     c0 -= 0x80;
3899     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3900     val = p[c0];
3901     if (val == 0) return 1;
3902     if (no_cp932ext_f && (
3903         (val>>8) == 0x2D || /* NEC special characters */
3904         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3905         )) return 1;
3906
3907     c2 = val >> 8;
3908    if (val > 0x7FFF){
3909         c2 &= 0x7f;
3910         c2 |= PREFIX_EUCG3;
3911     }
3912     if (c2 == SO) c2 = JIS_X_0201;
3913     c1 = val & 0x7f;
3914     if (p2) *p2 = c2;
3915     if (p1) *p1 = c1;
3916     return 0;
3917 }
3918
3919 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3920 {
3921     int shift = 20;
3922     c &= VALUE_MASK;
3923     while(shift >= 0){
3924         if(c >= 1<<shift){
3925             while(shift >= 0){
3926                 (*f)(0, bin2hex(c>>shift));
3927                 shift -= 4;
3928             }
3929         }else{
3930             shift -= 4;
3931         }
3932     }
3933     return;
3934 }
3935
3936 void encode_fallback_html(nkf_char c)
3937 {
3938     (*oconv)(0, '&');
3939     (*oconv)(0, '#');
3940     c &= VALUE_MASK;
3941     if(c >= NKF_INT32_C(1000000))
3942         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3943     if(c >= NKF_INT32_C(100000))
3944         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3945     if(c >= 10000)
3946         (*oconv)(0, 0x30+(c/10000  )%10);
3947     if(c >= 1000)
3948         (*oconv)(0, 0x30+(c/1000   )%10);
3949     if(c >= 100)
3950         (*oconv)(0, 0x30+(c/100    )%10);
3951     if(c >= 10)
3952         (*oconv)(0, 0x30+(c/10     )%10);
3953     if(c >= 0)
3954         (*oconv)(0, 0x30+ c         %10);
3955     (*oconv)(0, ';');
3956     return;
3957 }
3958
3959 void encode_fallback_xml(nkf_char c)
3960 {
3961     (*oconv)(0, '&');
3962     (*oconv)(0, '#');
3963     (*oconv)(0, 'x');
3964     nkf_each_char_to_hex(oconv, c);
3965     (*oconv)(0, ';');
3966     return;
3967 }
3968
3969 void encode_fallback_java(nkf_char c)
3970 {
3971     (*oconv)(0, '\\');
3972     c &= VALUE_MASK;
3973     if(!is_unicode_bmp(c)){
3974         (*oconv)(0, 'U');
3975         (*oconv)(0, '0');
3976         (*oconv)(0, '0');
3977         (*oconv)(0, bin2hex(c>>20));
3978         (*oconv)(0, bin2hex(c>>16));
3979     }else{
3980         (*oconv)(0, 'u');
3981     }
3982     (*oconv)(0, bin2hex(c>>12));
3983     (*oconv)(0, bin2hex(c>> 8));
3984     (*oconv)(0, bin2hex(c>> 4));
3985     (*oconv)(0, bin2hex(c    ));
3986     return;
3987 }
3988
3989 void encode_fallback_perl(nkf_char c)
3990 {
3991     (*oconv)(0, '\\');
3992     (*oconv)(0, 'x');
3993     (*oconv)(0, '{');
3994     nkf_each_char_to_hex(oconv, c);
3995     (*oconv)(0, '}');
3996     return;
3997 }
3998
3999 void encode_fallback_subchar(nkf_char c)
4000 {
4001     c = unicode_subchar;
4002     (*oconv)((c>>8)&0xFF, c&0xFF);
4003     return;
4004 }
4005 #endif
4006
4007 #ifdef UTF8_OUTPUT_ENABLE
4008 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
4009 {
4010     const unsigned short *p;
4011
4012     if (c2 == JIS_X_0201) {
4013         if (ms_ucs_map_f == UCS_MAP_CP10001) {
4014             switch (c1) {
4015             case 0x20:
4016                 return 0xA0;
4017             case 0x7D:
4018                 return 0xA9;
4019             }
4020         }
4021         p = euc_to_utf8_1byte;
4022 #ifdef X0212_ENABLE
4023     } else if (is_eucg3(c2)){
4024         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
4025             return 0xA6;
4026         }
4027         c2 = (c2&0x7f) - 0x21;
4028         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4029             p = x0212_to_utf8_2bytes[c2];
4030         else
4031             return 0;
4032 #endif
4033     } else {
4034         c2 &= 0x7f;
4035         c2 = (c2&0x7f) - 0x21;
4036         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4037             p =
4038                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
4039                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
4040                 euc_to_utf8_2bytes_ms[c2];
4041         else
4042             return 0;
4043     }
4044     if (!p) return 0;
4045     c1 = (c1 & 0x7f) - 0x21;
4046     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
4047         return p[c1];
4048     return 0;
4049 }
4050
4051 void w_oconv(nkf_char c2, nkf_char c1)
4052 {
4053     nkf_char c0;
4054     nkf_char val;
4055
4056     if (output_bom_f) {
4057         output_bom_f = FALSE;
4058         (*o_putc)('\357');
4059         (*o_putc)('\273');
4060         (*o_putc)('\277');
4061     }
4062
4063     if (c2 == EOF) {
4064         (*o_putc)(EOF);
4065         return;
4066     }
4067
4068 #ifdef NUMCHAR_OPTION
4069     if (c2 == 0 && is_unicode_capsule(c1)){
4070         val = c1 & VALUE_MASK;
4071         if (val < 0x80){
4072             (*o_putc)(val);
4073         }else if (val < 0x800){
4074             (*o_putc)(0xC0 | (val >> 6));
4075             (*o_putc)(0x80 | (val & 0x3f));
4076         } else if (val <= NKF_INT32_C(0xFFFF)) {
4077             (*o_putc)(0xE0 | (val >> 12));
4078             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
4079             (*o_putc)(0x80 | (val        & 0x3f));
4080         } else if (val <= NKF_INT32_C(0x10FFFF)) {
4081             (*o_putc)(0xF0 | ( val>>18));
4082             (*o_putc)(0x80 | ((val>>12) & 0x3f));
4083             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
4084             (*o_putc)(0x80 | ( val      & 0x3f));
4085         }
4086         return;
4087     }
4088 #endif
4089
4090     if (c2 == 0) {
4091         output_mode = ASCII;
4092         (*o_putc)(c1);
4093     } else if (c2 == ISO_8859_1) {
4094         output_mode = UTF_8;
4095         (*o_putc)(c1 | 0x080);
4096     } else {
4097         output_mode = UTF_8;
4098         val = e2w_conv(c2, c1);
4099         if (val){
4100             w16w_conv(val, &c2, &c1, &c0);
4101             (*o_putc)(c2);
4102             if (c1){
4103                 (*o_putc)(c1);
4104                 if (c0) (*o_putc)(c0);
4105             }
4106         }
4107     }
4108 }
4109
4110 void w_oconv16(nkf_char c2, nkf_char c1)
4111 {
4112     if (output_bom_f) {
4113         output_bom_f = FALSE;
4114         if (output_endian == ENDIAN_LITTLE){
4115             (*o_putc)((unsigned char)'\377');
4116             (*o_putc)('\376');
4117         }else{
4118             (*o_putc)('\376');
4119             (*o_putc)((unsigned char)'\377');
4120         }
4121     }
4122
4123     if (c2 == EOF) {
4124         (*o_putc)(EOF);
4125         return;
4126     }
4127
4128     if (c2 == ISO_8859_1) {
4129         c2 = 0;
4130         c1 |= 0x80;
4131 #ifdef NUMCHAR_OPTION
4132     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4133         if (is_unicode_bmp(c1)) {
4134             c2 = (c1 >> 8) & 0xff;
4135             c1 &= 0xff;
4136         } else {
4137             c1 &= VALUE_MASK;
4138             if (c1 <= UNICODE_MAX) {
4139                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
4140                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
4141                 if (output_endian == ENDIAN_LITTLE){
4142                     (*o_putc)(c2 & 0xff);
4143                     (*o_putc)((c2 >> 8) & 0xff);
4144                     (*o_putc)(c1 & 0xff);
4145                     (*o_putc)((c1 >> 8) & 0xff);
4146                 }else{
4147                     (*o_putc)((c2 >> 8) & 0xff);
4148                     (*o_putc)(c2 & 0xff);
4149                     (*o_putc)((c1 >> 8) & 0xff);
4150                     (*o_putc)(c1 & 0xff);
4151                 }
4152             }
4153             return;
4154         }
4155 #endif
4156     } else if (c2) {
4157         nkf_char val = e2w_conv(c2, c1);
4158         c2 = (val >> 8) & 0xff;
4159         c1 = val & 0xff;
4160         if (!val) return;
4161     }
4162     if (output_endian == ENDIAN_LITTLE){
4163         (*o_putc)(c1);
4164         (*o_putc)(c2);
4165     }else{
4166         (*o_putc)(c2);
4167         (*o_putc)(c1);
4168     }
4169 }
4170
4171 void w_oconv32(nkf_char c2, nkf_char c1)
4172 {
4173     if (output_bom_f) {
4174         output_bom_f = FALSE;
4175         if (output_endian == ENDIAN_LITTLE){
4176             (*o_putc)((unsigned char)'\377');
4177             (*o_putc)('\376');
4178             (*o_putc)('\000');
4179             (*o_putc)('\000');
4180         }else{
4181             (*o_putc)('\000');
4182             (*o_putc)('\000');
4183             (*o_putc)('\376');
4184             (*o_putc)((unsigned char)'\377');
4185         }
4186     }
4187
4188     if (c2 == EOF) {
4189         (*o_putc)(EOF);
4190         return;
4191     }
4192
4193     if (c2 == ISO_8859_1) {
4194         c1 |= 0x80;
4195 #ifdef NUMCHAR_OPTION
4196     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4197         c1 &= VALUE_MASK;
4198 #endif
4199     } else if (c2) {
4200         c1 = e2w_conv(c2, c1);
4201         if (!c1) return;
4202     }
4203     if (output_endian == ENDIAN_LITTLE){
4204         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4205         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4206         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4207         (*o_putc)('\000');
4208     }else{
4209         (*o_putc)('\000');
4210         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4211         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4212         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4213     }
4214 }
4215 #endif
4216
4217 void e_oconv(nkf_char c2, nkf_char c1)
4218 {
4219 #ifdef NUMCHAR_OPTION
4220     if (c2 == 0 && is_unicode_capsule(c1)){
4221