OSDN Git Service

* support encoding "SJIS".
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.156 2007/12/19 08:57:58 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-12-19"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #if defined(DEFAULT_CODE_JIS)
44 #elif defined(DEFAULT_CODE_SJIS)
45 #elif defined(DEFAULT_CODE_EUC)
46 #elif defined(DEFAULT_CODE_UTF8)
47 #else
48 #define DEFAULT_CODE_JIS 1
49 #endif
50
51 #ifndef MIME_DECODE_DEFAULT
52 #define MIME_DECODE_DEFAULT STRICT_MIME
53 #endif
54 #ifndef X0201_DEFAULT
55 #define X0201_DEFAULT TRUE
56 #endif
57
58 #if DEFAULT_NEWLINE == 0x0D0A
59 #define PUT_NEWLINE(func) do {\
60     func(0x0D);\
61     func(0x0A);\
62 } while (0)
63 #define OCONV_NEWLINE(func) do {\
64     func(0, 0x0D);\
65     func(0, 0x0A);\
66 } while (0)
67 #elif DEFAULT_NEWLINE == 0x0D
68 #define PUT_NEWLINE(func) func(0x0D)
69 #define OCONV_NEWLINE(func) func(0, 0x0D)
70 #else
71 #define DEFAULT_NEWLINE 0x0A
72 #define PUT_NEWLINE(func) func(0x0A)
73 #define OCONV_NEWLINE(func) func(0, 0x0A)
74 #endif
75
76 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
77 #define MSDOS
78 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
79 #define __WIN32__
80 #endif
81 #endif
82
83 #ifdef PERL_XS
84 #undef OVERWRITE
85 #endif
86
87 #ifndef PERL_XS
88 #include <stdio.h>
89 #endif
90
91 #include <stdlib.h>
92 #include <string.h>
93
94 #if defined(MSDOS) || defined(__OS2__)
95 #include <fcntl.h>
96 #include <io.h>
97 #if defined(_MSC_VER) || defined(__WATCOMC__)
98 #define mktemp _mktemp
99 #endif
100 #endif
101
102 #ifdef MSDOS
103 #ifdef LSI_C
104 #define setbinmode(fp) fsetbin(fp)
105 #elif defined(__DJGPP__)
106 #include <libc/dosio.h>
107 #define setbinmode(fp) djgpp_setbinmode(fp)
108 #else /* Microsoft C, Turbo C */
109 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
110 #endif
111 #else /* UNIX */
112 #define setbinmode(fp)
113 #endif
114
115 #if defined(__DJGPP__)
116 void  djgpp_setbinmode(FILE *fp)
117 {
118     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
119     int fd, m;
120     fd = fileno(fp);
121     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
122     __file_handle_set(fd, m);
123 }
124 #endif
125
126 #ifdef _IOFBF /* SysV and MSDOS, Windows */
127 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
128 #else /* BSD */
129 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
130 #endif
131
132 /*Borland C++ 4.5 EasyWin*/
133 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
134 #define         EASYWIN
135 #ifndef __WIN16__
136 #define __WIN16__
137 #endif
138 #include <windows.h>
139 #endif
140
141 #ifdef OVERWRITE
142 /* added by satoru@isoternet.org */
143 #if defined(__EMX__)
144 #include <sys/types.h>
145 #endif
146 #include <sys/stat.h>
147 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
148 #include <unistd.h>
149 #if defined(__WATCOMC__)
150 #include <sys/utime.h>
151 #else
152 #include <utime.h>
153 #endif
154 #else /* defined(MSDOS) */
155 #ifdef __WIN32__
156 #ifdef __BORLANDC__ /* BCC32 */
157 #include <utime.h>
158 #else /* !defined(__BORLANDC__) */
159 #include <sys/utime.h>
160 #endif /* (__BORLANDC__) */
161 #else /* !defined(__WIN32__) */
162 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
163 #include <sys/utime.h>
164 #elif defined(__TURBOC__) /* BCC */
165 #include <utime.h>
166 #elif defined(LSI_C) /* LSI C */
167 #endif /* (__WIN32__) */
168 #endif
169 #endif
170 #endif
171
172 #define         FALSE   0
173 #define         TRUE    1
174
175 /* state of output_mode and input_mode
176
177    c2           0 means ASCII
178                 JIS_X_0201
179                 ISO_8859_1
180                 JIS_X_0208
181                 EOF      all termination
182    c1           32bit data
183
184  */
185
186 /* Input Assumption */
187
188 #define         JIS_INPUT       4
189 #define         EUC_INPUT      16
190 #define         SJIS_INPUT      5
191 #define         LATIN1_INPUT    6
192 #define         UTF8_INPUT     13
193 #define         UTF16_INPUT    1015
194 #define         UTF32_INPUT    1017
195
196 #define         FIXED_MIME      7
197 #define         STRICT_MIME     8
198
199 /* MIME ENCODE */
200
201
202 /* byte order */
203
204 #define         ENDIAN_BIG      1234
205 #define         ENDIAN_LITTLE   4321
206 #define         ENDIAN_2143     2143
207 #define         ENDIAN_3412     3412
208
209 /* ASCII CODE */
210
211 #define         BS      0x08
212 #define         TAB     0x09
213 #define         LF      0x0a
214 #define         CR      0x0d
215 #define         ESC     0x1b
216 #define         SP      0x20
217 #define         AT      0x40
218 #define         SSP     0xa0
219 #define         DEL     0x7f
220 #define         SI      0x0f
221 #define         SO      0x0e
222 #define         SSO     0x8e
223 #define         SS3     0x8f
224 #define         CRLF    0x0D0A
225
226
227 /* encodings */
228
229 enum nkf_encodings {
230     ASCII,
231     JIS_X_0208,
232     JIS_X_0201,
233     ISO_8859_1,
234     ISO_2022_JP,
235     CP50220,
236     CP50221,
237     CP50222,
238     ISO_2022_JP_1,
239     ISO_2022_JP_3,
240     SHIFT_JIS,
241     WINDOWS_31J,
242     CP10001,
243     EUC_JP,
244     CP51932,
245     EUCJP_MS,
246     EUCJP_ASCII,
247     SHIFT_JISX0213,
248     SHIFT_JIS_2004,
249     EUC_JISX0213,
250     EUC_JIS_2004,
251     UTF_8,
252     UTF_8N,
253     UTF_8_BOM,
254     UTF8_MAC,
255     UTF_16,
256     UTF_16BE,
257     UTF_16BE_BOM,
258     UTF_16LE,
259     UTF_16LE_BOM,
260     UTF_32,
261     UTF_32BE,
262     UTF_32BE_BOM,
263     UTF_32LE,
264     UTF_32LE_BOM,
265     JIS_X_0212=0x2844,
266     JIS_X_0213_1=0x284F,
267     JIS_X_0213_2=0x2850,
268     BINARY
269 };
270 static const struct {
271     const int id;
272     const char *name;
273 } encoding_id_to_name_table[] = {
274     {ASCII,             "ASCII"},
275     {ISO_8859_1,        "ISO-8859-1"},
276     {ISO_2022_JP,       "ISO-2022-JP"},
277     {CP50220,           "CP50220"},
278     {CP50221,           "CP50221"},
279     {CP50222,           "CP50222"},
280     {ISO_2022_JP_1,     "ISO-2022-JP-1"},
281     {ISO_2022_JP_3,     "ISO-2022-JP-3"},
282     {SHIFT_JIS,         "Shift_JIS"},
283     {WINDOWS_31J,       "WINDOWS-31J"},
284     {CP10001,           "CP10001"},
285     {EUC_JP,            "EUC-JP"},
286     {CP51932,           "CP51932"},
287     {EUCJP_MS,          "eucJP-MS"},
288     {EUCJP_ASCII,       "eucJP-ASCII"},
289     {SHIFT_JISX0213,    "Shift_JISX0213"},
290     {SHIFT_JIS_2004,    "Shift_JIS-2004"},
291     {EUC_JISX0213,      "EUC-JISX0213"},
292     {EUC_JIS_2004,      "EUC-JIS-2004"},
293     {UTF_8,             "UTF-8"},
294     {UTF_8N,            "UTF-8N"},
295     {UTF_8_BOM,         "UTF-8-BOM"},
296     {UTF8_MAC,          "UTF8-MAC"},
297     {UTF_16,            "UTF-16"},
298     {UTF_16BE,          "UTF-16BE"},
299     {UTF_16BE_BOM,      "UTF-16BE-BOM"},
300     {UTF_16LE,          "UTF-16LE"},
301     {UTF_16LE_BOM,      "UTF-16LE-BOM"},
302     {UTF_32,            "UTF-32"},
303     {UTF_32BE,          "UTF-32BE"},
304     {UTF_32BE_BOM,      "UTF-32BE-BOM"},
305     {UTF_32LE,          "UTF-32LE"},
306     {UTF_32LE_BOM,      "UTF-32LE-BOM"},
307     {BINARY,            "BINARY"},
308     {-1,                        ""}
309 };
310 static const struct {
311     const char *name;
312     const int id;
313 } encoding_name_to_id_table[] = {
314     {"ASCII",                   ASCII},
315     {"ISO-2022-JP",             ISO_2022_JP},
316     {"X-ISO2022JP-CP932",       CP50220},
317     {"CP50220",                 CP50220},
318     {"CP50221",                 CP50221},
319     {"CP50222",                 CP50222},
320     {"ISO-2022-JP-1",           ISO_2022_JP_1},
321     {"ISO-2022-JP-3",           ISO_2022_JP_3},
322     {"SHIFT_JIS",               SHIFT_JIS},
323     {"SJIS",                    SHIFT_JIS},
324     {"WINDOWS-31J",             WINDOWS_31J},
325     {"CSWINDOWS31J",            WINDOWS_31J},
326     {"CP932",                   WINDOWS_31J},
327     {"MS932",                   WINDOWS_31J},
328     {"CP10001",                 CP10001},
329     {"EUCJP",                   EUC_JP},
330     {"EUC-JP",                  EUC_JP},
331     {"CP51932",                 CP51932},
332     {"EUC-JP-MS",               EUCJP_MS},
333     {"EUCJP-MS",                EUCJP_MS},
334     {"EUCJPMS",                 EUCJP_MS},
335     {"EUC-JP-ASCII",            EUCJP_ASCII},
336     {"EUCJP-ASCII",             EUCJP_ASCII},
337     {"SHIFT_JISX0213",          SHIFT_JISX0213},
338     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
339     {"EUC-JISX0213",            EUC_JISX0213},
340     {"EUC-JIS-2004",            EUC_JIS_2004},
341     {"UTF-8",                   UTF_8},
342     {"UTF-8N",                  UTF_8N},
343     {"UTF-8-BOM",               UTF_8_BOM},
344     {"UTF8-MAC",                UTF8_MAC},
345     {"UTF-8-MAC",               UTF8_MAC},
346     {"UTF-16",                  UTF_16},
347     {"UTF-16BE",                UTF_16BE},
348     {"UTF-16BE-BOM",            UTF_16BE_BOM},
349     {"UTF-16LE",                UTF_16LE},
350     {"UTF-16LE-BOM",            UTF_16LE_BOM},
351     {"UTF-32",                  UTF_32},
352     {"UTF-32BE",                UTF_32BE},
353     {"UTF-32BE-BOM",            UTF_32BE_BOM},
354     {"UTF-32LE",                UTF_32LE},
355     {"UTF-32LE-BOM",            UTF_32LE_BOM},
356     {"BINARY",                  BINARY},
357     {"",                        -1}
358 };
359 #if defined(DEFAULT_CODE_JIS)
360 #define     DEFAULT_ENCODING ISO_2022_JP
361 #elif defined(DEFAULT_CODE_SJIS)
362 #define     DEFAULT_ENCODING SHIFT_JIS
363 #elif defined(DEFAULT_CODE_EUC)
364 #define     DEFAULT_ENCODING EUC_JP
365 #elif defined(DEFAULT_CODE_UTF8)
366 #define     DEFAULT_ENCODING UTF_8
367 #endif
368
369
370 #define         is_alnum(c)  \
371             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
372
373 /* I don't trust portablity of toupper */
374 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
375 #define nkf_isoctal(c)  ('0'<=c && c<='7')
376 #define nkf_isdigit(c)  ('0'<=c && c<='9')
377 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
378 #define nkf_isblank(c) (c == SP || c == TAB)
379 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
380 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
381 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
382 #define nkf_isprint(c) (SP<=c && c<='~')
383 #define nkf_isgraph(c) ('!'<=c && c<='~')
384 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
385                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
386                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
387 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
388 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
389 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
390     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
391      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
392
393 #define CP932_TABLE_BEGIN 0xFA
394 #define CP932_TABLE_END   0xFC
395 #define CP932INV_TABLE_BEGIN 0xED
396 #define CP932INV_TABLE_END   0xEE
397 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
398
399 #define         HOLD_SIZE       1024
400 #if defined(INT_IS_SHORT)
401 #define         IOBUF_SIZE      2048
402 #else
403 #define         IOBUF_SIZE      16384
404 #endif
405
406 #define         DEFAULT_J       'B'
407 #define         DEFAULT_R       'B'
408
409 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
410 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
411
412 #define         RANGE_NUM_MAX   18
413 #define         GETA1   0x22
414 #define         GETA2   0x2e
415
416
417 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
418 #define sizeof_euc_to_utf8_1byte 94
419 #define sizeof_euc_to_utf8_2bytes 94
420 #define sizeof_utf8_to_euc_C2 64
421 #define sizeof_utf8_to_euc_E5B8 64
422 #define sizeof_utf8_to_euc_2bytes 112
423 #define sizeof_utf8_to_euc_3bytes 16
424 #endif
425
426 /* MIME preprocessor */
427
428 #ifdef EASYWIN /*Easy Win */
429 extern POINT _BufferSize;
430 #endif
431
432 struct input_code{
433     char *name;
434     nkf_char stat;
435     nkf_char score;
436     nkf_char index;
437     nkf_char buf[3];
438     void (*status_func)(struct input_code *, nkf_char);
439     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
440     int _file_stat;
441 };
442
443 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
444 static int output_encoding = DEFAULT_ENCODING;
445
446 #if !defined(PERL_XS) && !defined(WIN32DLL)
447 static  nkf_char     noconvert(FILE *f);
448 #endif
449 static  void    module_connection(void);
450 static  nkf_char     kanji_convert(FILE *f);
451 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
452 static  nkf_char     push_hold_buf(nkf_char c2);
453 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
454 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
455 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
456 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
457 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
458 /* UCS Mapping
459  * 0: Shift_JIS, eucJP-ascii
460  * 1: eucJP-ms
461  * 2: CP932, CP51932
462  * 3: CP10001
463  */
464 #define UCS_MAP_ASCII   0
465 #define UCS_MAP_MS      1
466 #define UCS_MAP_CP932   2
467 #define UCS_MAP_CP10001 3
468 static int ms_ucs_map_f = UCS_MAP_ASCII;
469 #endif
470 #ifdef UTF8_INPUT_ENABLE
471 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
472 static  int     no_cp932ext_f = FALSE;
473 /* ignore ZERO WIDTH NO-BREAK SPACE */
474 static  int     no_best_fit_chars_f = FALSE;
475 static  int     input_endian = ENDIAN_BIG;
476 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
477 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
478 static  void    encode_fallback_html(nkf_char c);
479 static  void    encode_fallback_xml(nkf_char c);
480 static  void    encode_fallback_java(nkf_char c);
481 static  void    encode_fallback_perl(nkf_char c);
482 static  void    encode_fallback_subchar(nkf_char c);
483 static  void    (*encode_fallback)(nkf_char c) = NULL;
484 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
485 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
486 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
487 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
488 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
489 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
490 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
491 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
492 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
493 static  void    w_status(struct input_code *, nkf_char);
494 #endif
495 #ifdef UTF8_OUTPUT_ENABLE
496 static  int     output_bom_f = FALSE;
497 static  int     output_endian = ENDIAN_BIG;
498 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
499 static  void    w_oconv(nkf_char c2,nkf_char c1);
500 static  void    w_oconv16(nkf_char c2,nkf_char c1);
501 static  void    w_oconv32(nkf_char c2,nkf_char c1);
502 #endif
503 static  void    e_oconv(nkf_char c2,nkf_char c1);
504 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
505 static  void    s_oconv(nkf_char c2,nkf_char c1);
506 static  void    j_oconv(nkf_char c2,nkf_char c1);
507 static  void    fold_conv(nkf_char c2,nkf_char c1);
508 static  void    nl_conv(nkf_char c2,nkf_char c1);
509 static  void    z_conv(nkf_char c2,nkf_char c1);
510 static  void    rot_conv(nkf_char c2,nkf_char c1);
511 static  void    hira_conv(nkf_char c2,nkf_char c1);
512 static  void    base64_conv(nkf_char c2,nkf_char c1);
513 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
514 static  void    no_connection(nkf_char c2,nkf_char c1);
515 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
516
517 static  void    code_score(struct input_code *ptr);
518 static  void    code_status(nkf_char c);
519
520 static  void    std_putc(nkf_char c);
521 static  nkf_char     std_getc(FILE *f);
522 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
523
524 static  nkf_char     broken_getc(FILE *f);
525 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
526
527 static  nkf_char     mime_begin(FILE *f);
528 static  nkf_char     mime_getc(FILE *f);
529 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
530
531 static  void    switch_mime_getc(void);
532 static  void    unswitch_mime_getc(void);
533 static  nkf_char     mime_begin_strict(FILE *f);
534 static  nkf_char     mime_getc_buf(FILE *f);
535 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
536 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
537
538 static  nkf_char     base64decode(nkf_char c);
539 static  void    mime_prechar(nkf_char c2, nkf_char c1);
540 static  void    mime_putc(nkf_char c);
541 static  void    open_mime(nkf_char c);
542 static  void    close_mime(void);
543 static  void    eof_mime(void);
544 static  void    mimeout_addchar(nkf_char c);
545 #ifndef PERL_XS
546 static  void    usage(void);
547 static  void    version(void);
548 static  void    show_configuration(void);
549 #endif
550 static  void    options(unsigned char *c);
551 static  void    reinit(void);
552
553 /* buffers */
554
555 #if !defined(PERL_XS) && !defined(WIN32DLL)
556 static unsigned char   stdibuf[IOBUF_SIZE];
557 static unsigned char   stdobuf[IOBUF_SIZE];
558 #endif
559 static unsigned char   hold_buf[HOLD_SIZE*2];
560 static int             hold_count = 0;
561
562 /* MIME preprocessor fifo */
563
564 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
565 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
566 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
567 static unsigned char           mime_buf[MIME_BUF_SIZE];
568 static unsigned int            mime_top = 0;
569 static unsigned int            mime_last = 0;  /* decoded */
570 static unsigned int            mime_input = 0; /* undecoded */
571 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
572
573 /* flags */
574 static int             unbuf_f = FALSE;
575 static int             estab_f = FALSE;
576 static int             nop_f = FALSE;
577 static int             binmode_f = TRUE;       /* binary mode */
578 static int             rot_f = FALSE;          /* rot14/43 mode */
579 static int             hira_f = FALSE;          /* hira/kata henkan */
580 static int             input_f = FALSE;        /* non fixed input code  */
581 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
582 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
583 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
584 static int             mimebuf_f = FALSE;      /* MIME buffered input */
585 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
586 static int             iso8859_f = FALSE;      /* ISO8859 through */
587 static int             mimeout_f = FALSE;       /* base64 mode */
588 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
589 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
590
591 #ifdef UNICODE_NORMALIZATION
592 static int nfc_f = FALSE;
593 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
594 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
595 static nkf_char nfc_getc(FILE *f);
596 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
597 #endif
598
599 #ifdef INPUT_OPTION
600 static int cap_f = FALSE;
601 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
602 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
603 static nkf_char cap_getc(FILE *f);
604 static nkf_char cap_ungetc(nkf_char c,FILE *f);
605
606 static int url_f = FALSE;
607 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
608 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
609 static nkf_char url_getc(FILE *f);
610 static nkf_char url_ungetc(nkf_char c,FILE *f);
611 #endif
612
613 #if defined(INT_IS_SHORT)
614 #define NKF_INT32_C(n)   (n##L)
615 #else
616 #define NKF_INT32_C(n)   (n)
617 #endif
618 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
619 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
620 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
621 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
622 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
623 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
624 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
625
626 #ifdef NUMCHAR_OPTION
627 static int numchar_f = FALSE;
628 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
629 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
630 static nkf_char numchar_getc(FILE *f);
631 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
632 #endif
633
634 #ifdef CHECK_OPTION
635 static int noout_f = FALSE;
636 static void no_putc(nkf_char c);
637 static int debug_f = FALSE;
638 static void debug(const char *str);
639 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
640 #endif
641
642 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
643 #if !defined PERL_XS
644 static  void    print_guessed_code(char *filename);
645 #endif
646 static  void    set_input_codename(char *codename);
647
648 #ifdef EXEC_IO
649 static int exec_f = 0;
650 #endif
651
652 #ifdef SHIFTJIS_CP932
653 /* invert IBM extended characters to others */
654 static int cp51932_f = FALSE;
655
656 /* invert NEC-selected IBM extended characters to IBM extended characters */
657 static int cp932inv_f = TRUE;
658
659 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
660 #endif /* SHIFTJIS_CP932 */
661
662 #ifdef X0212_ENABLE
663 static int x0212_f = FALSE;
664 static nkf_char x0212_shift(nkf_char c);
665 static nkf_char x0212_unshift(nkf_char c);
666 #endif
667 static int x0213_f = FALSE;
668
669 static unsigned char prefix_table[256];
670
671 static void set_code_score(struct input_code *ptr, nkf_char score);
672 static void clr_code_score(struct input_code *ptr, nkf_char score);
673 static void status_disable(struct input_code *ptr);
674 static void status_push_ch(struct input_code *ptr, nkf_char c);
675 static void status_clear(struct input_code *ptr);
676 static void status_reset(struct input_code *ptr);
677 static void status_reinit(struct input_code *ptr);
678 static void status_check(struct input_code *ptr, nkf_char c);
679 static void e_status(struct input_code *, nkf_char);
680 static void s_status(struct input_code *, nkf_char);
681
682 struct input_code input_code_list[] = {
683     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
684     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
685 #ifdef UTF8_INPUT_ENABLE
686     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
687     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
688     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
689 #endif
690     {0}
691 };
692
693 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
694 static int              base64_count = 0;
695
696 /* X0208 -> ASCII converter */
697
698 /* fold parameter */
699 static int             f_line = 0;    /* chars in line */
700 static int             f_prev = 0;
701 static int             fold_preserve_f = FALSE; /* preserve new lines */
702 static int             fold_f  = FALSE;
703 static int             fold_len  = 0;
704
705 /* options */
706 static unsigned char   kanji_intro = DEFAULT_J;
707 static unsigned char   ascii_intro = DEFAULT_R;
708
709 /* Folding */
710
711 #define FOLD_MARGIN  10
712 #define DEFAULT_FOLD 60
713
714 static int             fold_margin  = FOLD_MARGIN;
715
716 /* converters */
717
718 #ifdef DEFAULT_CODE_JIS
719 #   define  DEFAULT_CONV j_oconv
720 #endif
721 #ifdef DEFAULT_CODE_SJIS
722 #   define  DEFAULT_CONV s_oconv
723 #endif
724 #ifdef DEFAULT_CODE_EUC
725 #   define  DEFAULT_CONV e_oconv
726 #endif
727 #ifdef DEFAULT_CODE_UTF8
728 #   define  DEFAULT_CONV w_oconv
729 #endif
730
731 /* process default */
732 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
733
734 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
735 /* s_iconv or oconv */
736 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
737
738 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
739 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
740 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
741 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
742 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
743 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
744 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
745
746 /* static redirections */
747
748 static  void   (*o_putc)(nkf_char c) = std_putc;
749
750 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
751 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
752
753 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
754 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
755
756 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
757
758 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
759 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
760
761 /* for strict mime */
762 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
763 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
764
765 /* Global states */
766 static int output_mode = ASCII,    /* output kanji mode */
767            input_mode =  ASCII,    /* input kanji mode */
768            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
769 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
770
771 /* X0201 / X0208 conversion tables */
772
773 /* X0201 kana conversion table */
774 /* 90-9F A0-DF */
775 static const unsigned char cv[]= {
776     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
777     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
778     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
779     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
780     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
781     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
782     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
783     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
784     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
785     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
786     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
787     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
788     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
789     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
790     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
791     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
792     0x00,0x00};
793
794
795 /* X0201 kana conversion table for daguten */
796 /* 90-9F A0-DF */
797 static const unsigned char dv[]= {
798     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
799     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
800     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
801     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
802     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
803     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
804     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
805     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
806     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
807     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
808     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
809     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
810     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
812     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814     0x00,0x00};
815
816 /* X0201 kana conversion table for han-daguten */
817 /* 90-9F A0-DF */
818 static const unsigned char ev[]= {
819     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
822     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
827     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
828     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
830     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
831     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
832     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
833     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835     0x00,0x00};
836
837
838 /* X0208 kigou conversion table */
839 /* 0x8140 - 0x819e */
840 static const unsigned char fv[] = {
841
842     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
843     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
844     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
845     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
846     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
847     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
848     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
849     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
850     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
851     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
853     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
854 } ;
855
856
857
858 static int             file_out_f = FALSE;
859 #ifdef OVERWRITE
860 static int             overwrite_f = FALSE;
861 static int             preserve_time_f = FALSE;
862 static int             backup_f = FALSE;
863 static char            *backup_suffix = "";
864 static char *get_backup_filename(const char *suffix, const char *filename);
865 #endif
866
867 static int nlmode_f = 0;   /* CR, LF, CRLF */
868 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
869 static nkf_char prev_cr = 0; /* CR or 0 */
870 #ifdef EASYWIN /*Easy Win */
871 static int             end_check;
872 #endif /*Easy Win */
873
874 #define STD_GC_BUFSIZE (256)
875 nkf_char std_gc_buf[STD_GC_BUFSIZE];
876 nkf_char std_gc_ndx;
877
878 char* nkf_strcpy(const char *str)
879 {
880     char* result = malloc(strlen(str) + 1);
881     if (!result){
882         perror(str);
883         return "";
884     }
885     strcpy(result, str);
886     return result;
887 }
888
889 static void nkf_str_upcase(const char *str, char *res, size_t length)
890 {
891     int i = 0;
892     for (; i < length && str[i]; i++) {
893         res[i] = nkf_toupper(str[i]);
894     }
895     res[i] = 0;
896 }
897
898 static int nkf_enc_find_index(const char *name)
899 {
900     int i, index = -1;
901     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
902         if (strcmp(name, encoding_name_to_id_table[i].name) == 0) {
903             return encoding_name_to_id_table[i].id;
904         }
905     }
906     return index;
907 }
908
909 #if defined(PERL_XS) || defined(WIN32DLL)
910 static char* nkf_enc_name(const int index)
911 {
912     int i;
913     const char* name = "ASCII";
914     for (i = 0; encoding_id_to_name_table[i].id >= 0; i++) {
915         if (encoding_id_to_name_table[i].id == index) {
916             return nkf_strcpy(encoding_id_to_name_table[i].name);
917         }
918     }
919     return nkf_strcpy(name);
920 }
921 #endif
922
923 #ifdef WIN32DLL
924 #include "nkf32dll.c"
925 #elif defined(PERL_XS)
926 #else /* WIN32DLL */
927 int main(int argc, char **argv)
928 {
929     FILE  *fin;
930     unsigned char  *cp;
931
932     char *outfname = NULL;
933     char *origfname;
934
935 #ifdef EASYWIN /*Easy Win */
936     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
937 #endif
938
939     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
940         cp = (unsigned char *)*argv;
941         options(cp);
942         if (guess_f) {
943 #ifdef CHECK_OPTION
944             int debug_f_back = debug_f;
945 #endif
946 #ifdef EXEC_IO
947             int exec_f_back = exec_f;
948 #endif
949 #ifdef X0212_ENABLE
950             int x0212_f_back = x0212_f;
951 #endif
952             int x0213_f_back = x0213_f;
953             int guess_f_back = guess_f;
954             reinit();
955             guess_f = guess_f_back;
956             mime_f = FALSE;
957 #ifdef CHECK_OPTION
958             debug_f = debug_f_back;
959 #endif
960 #ifdef EXEC_IO
961             exec_f = exec_f_back;
962 #endif
963 #ifdef X0212_ENABLE
964             x0212_f = x0212_f_back;
965 #endif
966             x0213_f = x0213_f_back;
967         }
968 #ifdef EXEC_IO
969         if (exec_f){
970             int fds[2], pid;
971             if (pipe(fds) < 0 || (pid = fork()) < 0){
972                 abort();
973             }
974             if (pid == 0){
975                 if (exec_f > 0){
976                     close(fds[0]);
977                     dup2(fds[1], 1);
978                 }else{
979                     close(fds[1]);
980                     dup2(fds[0], 0);
981                 }
982                 execvp(argv[1], &argv[1]);
983             }
984             if (exec_f > 0){
985                 close(fds[1]);
986                 dup2(fds[0], 0);
987             }else{
988                 close(fds[0]);
989                 dup2(fds[1], 1);
990             }
991             argc = 0;
992             break;
993         }
994 #endif
995     }
996
997     if (binmode_f == TRUE)
998 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
999     if (freopen("","wb",stdout) == NULL)
1000         return (-1);
1001 #else
1002     setbinmode(stdout);
1003 #endif
1004
1005     if (unbuf_f)
1006       setbuf(stdout, (char *) NULL);
1007     else
1008       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
1009
1010     if (argc == 0) {
1011       if (binmode_f == TRUE)
1012 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1013       if (freopen("","rb",stdin) == NULL) return (-1);
1014 #else
1015       setbinmode(stdin);
1016 #endif
1017       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
1018       if (nop_f)
1019           noconvert(stdin);
1020       else {
1021           kanji_convert(stdin);
1022           if (guess_f) print_guessed_code(NULL);
1023       }
1024     } else {
1025       int nfiles = argc;
1026         int is_argument_error = FALSE;
1027       while (argc--) {
1028             input_codename = NULL;
1029             input_newline = 0;
1030 #ifdef CHECK_OPTION
1031             iconv_for_check = 0;
1032 #endif
1033           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
1034               perror(*--argv);
1035                 *argv++;
1036                 is_argument_error = TRUE;
1037                 continue;
1038           } else {
1039 #ifdef OVERWRITE
1040               int fd = 0;
1041               int fd_backup = 0;
1042 #endif
1043
1044 /* reopen file for stdout */
1045               if (file_out_f == TRUE) {
1046 #ifdef OVERWRITE
1047                   if (overwrite_f){
1048                       outfname = malloc(strlen(origfname)
1049                                         + strlen(".nkftmpXXXXXX")
1050                                         + 1);
1051                       if (!outfname){
1052                           perror(origfname);
1053                           return -1;
1054                       }
1055                       strcpy(outfname, origfname);
1056 #ifdef MSDOS
1057                       {
1058                           int i;
1059                           for (i = strlen(outfname); i; --i){
1060                               if (outfname[i - 1] == '/'
1061                                   || outfname[i - 1] == '\\'){
1062                                   break;
1063                               }
1064                           }
1065                           outfname[i] = '\0';
1066                       }
1067                       strcat(outfname, "ntXXXXXX");
1068                       mktemp(outfname);
1069                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
1070                                 S_IREAD | S_IWRITE);
1071 #else
1072                       strcat(outfname, ".nkftmpXXXXXX");
1073                       fd = mkstemp(outfname);
1074 #endif
1075                       if (fd < 0
1076                           || (fd_backup = dup(fileno(stdout))) < 0
1077                           || dup2(fd, fileno(stdout)) < 0
1078                           ){
1079                           perror(origfname);
1080                           return -1;
1081                       }
1082                   }else
1083 #endif
1084                   if(argc == 1) {
1085                       outfname = *argv++;
1086                       argc--;
1087                   } else {
1088                       outfname = "nkf.out";
1089                   }
1090
1091                   if(freopen(outfname, "w", stdout) == NULL) {
1092                       perror (outfname);
1093                       return (-1);
1094                   }
1095                   if (binmode_f == TRUE) {
1096 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1097                       if (freopen("","wb",stdout) == NULL)
1098                            return (-1);
1099 #else
1100                       setbinmode(stdout);
1101 #endif
1102                   }
1103               }
1104               if (binmode_f == TRUE)
1105 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
1106                  if (freopen("","rb",fin) == NULL)
1107                     return (-1);
1108 #else
1109                  setbinmode(fin);
1110 #endif
1111               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
1112               if (nop_f)
1113                   noconvert(fin);
1114               else {
1115                   char *filename = NULL;
1116                   kanji_convert(fin);
1117                   if (nfiles > 1) filename = origfname;
1118                   if (guess_f) print_guessed_code(filename);
1119               }
1120               fclose(fin);
1121 #ifdef OVERWRITE
1122               if (overwrite_f) {
1123                   struct stat     sb;
1124 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1125                   time_t tb[2];
1126 #else
1127                   struct utimbuf  tb;
1128 #endif
1129
1130                   fflush(stdout);
1131                   close(fd);
1132                   if (dup2(fd_backup, fileno(stdout)) < 0){
1133                       perror("dup2");
1134                   }
1135                   if (stat(origfname, &sb)) {
1136                       fprintf(stderr, "Can't stat %s\n", origfname);
1137                   }
1138                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
1139                   if (chmod(outfname, sb.st_mode)) {
1140                       fprintf(stderr, "Can't set permission %s\n", outfname);
1141                   }
1142
1143                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
1144                     if(preserve_time_f){
1145 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
1146                         tb[0] = tb[1] = sb.st_mtime;
1147                         if (utime(outfname, tb)) {
1148                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1149                         }
1150 #else
1151                         tb.actime  = sb.st_atime;
1152                         tb.modtime = sb.st_mtime;
1153                         if (utime(outfname, &tb)) {
1154                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
1155                         }
1156 #endif
1157                     }
1158                     if(backup_f){
1159                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
1160 #ifdef MSDOS
1161                         unlink(backup_filename);
1162 #endif
1163                         if (rename(origfname, backup_filename)) {
1164                             perror(backup_filename);
1165                             fprintf(stderr, "Can't rename %s to %s\n",
1166                                     origfname, backup_filename);
1167                         }
1168                     }else{
1169 #ifdef MSDOS
1170                         if (unlink(origfname)){
1171                             perror(origfname);
1172                         }
1173 #endif
1174                     }
1175                   if (rename(outfname, origfname)) {
1176                       perror(origfname);
1177                       fprintf(stderr, "Can't rename %s to %s\n",
1178                               outfname, origfname);
1179                   }
1180                   free(outfname);
1181               }
1182 #endif
1183           }
1184       }
1185         if (is_argument_error)
1186             return(-1);
1187     }
1188 #ifdef EASYWIN /*Easy Win */
1189     if (file_out_f == FALSE)
1190         scanf("%d",&end_check);
1191     else
1192         fclose(stdout);
1193 #else /* for Other OS */
1194     if (file_out_f == TRUE)
1195         fclose(stdout);
1196 #endif /*Easy Win */
1197     return (0);
1198 }
1199 #endif /* WIN32DLL */
1200
1201 #ifdef OVERWRITE
1202 char *get_backup_filename(const char *suffix, const char *filename)
1203 {
1204     char *backup_filename;
1205     int asterisk_count = 0;
1206     int i, j;
1207     int filename_length = strlen(filename);
1208
1209     for(i = 0; suffix[i]; i++){
1210         if(suffix[i] == '*') asterisk_count++;
1211     }
1212
1213     if(asterisk_count){
1214         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1215         if (!backup_filename){
1216             perror("Can't malloc backup filename.");
1217             return NULL;
1218         }
1219
1220         for(i = 0, j = 0; suffix[i];){
1221             if(suffix[i] == '*'){
1222                 backup_filename[j] = '\0';
1223                 strncat(backup_filename, filename, filename_length);
1224                 i++;
1225                 j += filename_length;
1226             }else{
1227                 backup_filename[j++] = suffix[i++];
1228             }
1229         }
1230         backup_filename[j] = '\0';
1231     }else{
1232         j = strlen(suffix) + filename_length;
1233         backup_filename = malloc( + 1);
1234         strcpy(backup_filename, filename);
1235         strcat(backup_filename, suffix);
1236         backup_filename[j] = '\0';
1237     }
1238     return backup_filename;
1239 }
1240 #endif
1241
1242 static const struct {
1243     const char *name;
1244     const char *alias;
1245 } long_option[] = {
1246     {"ic=", ""},
1247     {"oc=", ""},
1248     {"base64","jMB"},
1249     {"euc","e"},
1250     {"euc-input","E"},
1251     {"fj","jm"},
1252     {"help","v"},
1253     {"jis","j"},
1254     {"jis-input","J"},
1255     {"mac","sLm"},
1256     {"mime","jM"},
1257     {"mime-input","m"},
1258     {"msdos","sLw"},
1259     {"sjis","s"},
1260     {"sjis-input","S"},
1261     {"unix","eLu"},
1262     {"version","V"},
1263     {"windows","sLw"},
1264     {"hiragana","h1"},
1265     {"katakana","h2"},
1266     {"katakana-hiragana","h3"},
1267     {"guess=", ""},
1268     {"guess", "g1"},
1269     {"cp932", ""},
1270     {"no-cp932", ""},
1271 #ifdef X0212_ENABLE
1272     {"x0212", ""},
1273 #endif
1274 #ifdef UTF8_OUTPUT_ENABLE
1275     {"utf8", "w"},
1276     {"utf16", "w16"},
1277     {"ms-ucs-map", ""},
1278     {"fb-skip", ""},
1279     {"fb-html", ""},
1280     {"fb-xml", ""},
1281     {"fb-perl", ""},
1282     {"fb-java", ""},
1283     {"fb-subchar", ""},
1284     {"fb-subchar=", ""},
1285 #endif
1286 #ifdef UTF8_INPUT_ENABLE
1287     {"utf8-input", "W"},
1288     {"utf16-input", "W16"},
1289     {"no-cp932ext", ""},
1290     {"no-best-fit-chars",""},
1291 #endif
1292 #ifdef UNICODE_NORMALIZATION
1293     {"utf8mac-input", ""},
1294 #endif
1295 #ifdef OVERWRITE
1296     {"overwrite", ""},
1297     {"overwrite=", ""},
1298     {"in-place", ""},
1299     {"in-place=", ""},
1300 #endif
1301 #ifdef INPUT_OPTION
1302     {"cap-input", ""},
1303     {"url-input", ""},
1304 #endif
1305 #ifdef NUMCHAR_OPTION
1306     {"numchar-input", ""},
1307 #endif
1308 #ifdef CHECK_OPTION
1309     {"no-output", ""},
1310     {"debug", ""},
1311 #endif
1312 #ifdef SHIFTJIS_CP932
1313     {"cp932inv", ""},
1314 #endif
1315 #ifdef EXEC_IO
1316     {"exec-in", ""},
1317     {"exec-out", ""},
1318 #endif
1319     {"prefix=", ""},
1320 };
1321
1322 static int option_mode = 0;
1323
1324 void options(unsigned char *cp)
1325 {
1326     nkf_char i, j;
1327     unsigned char *p;
1328     unsigned char *cp_back = NULL;
1329     char codeset[32];
1330
1331     if (option_mode==1)
1332         return;
1333     while(*cp && *cp++!='-');
1334     while (*cp || cp_back) {
1335         if(!*cp){
1336             cp = cp_back;
1337             cp_back = NULL;
1338             continue;
1339         }
1340         p = 0;
1341         switch (*cp++) {
1342         case '-':  /* literal options */
1343             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1344                 option_mode = 1;
1345                 return;
1346             }
1347             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1348                 p = (unsigned char *)long_option[i].name;
1349                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1350                 if (*p == cp[j] || cp[j] == SP){
1351                     p = &cp[j] + 1;
1352                     break;
1353                 }
1354                 p = 0;
1355             }
1356             if (p == 0) {
1357                 fprintf(stderr, "unknown long option: --%s\n", cp);
1358                 return;
1359             }
1360             while(*cp && *cp != SP && cp++);
1361             if (long_option[i].alias[0]){
1362                 cp_back = cp;
1363                 cp = (unsigned char *)long_option[i].alias;
1364             }else{
1365                 if (strcmp(long_option[i].name, "ic=") == 0){
1366                     nkf_str_upcase(p, codeset, 32);
1367                     i = nkf_enc_find_index(codeset);
1368                     switch (i) {
1369                     case ISO_2022_JP:
1370                         input_f = JIS_INPUT;
1371                         break;
1372                     case CP50220:
1373                     case CP50221:
1374                     case CP50222:
1375                         input_f = JIS_INPUT;
1376 #ifdef SHIFTJIS_CP932
1377                         cp51932_f = TRUE;
1378 #endif
1379 #ifdef UTF8_OUTPUT_ENABLE
1380                         ms_ucs_map_f = UCS_MAP_CP932;
1381 #endif
1382                         break;
1383                     case ISO_2022_JP_1:
1384                         input_f = JIS_INPUT;
1385 #ifdef X0212_ENABLE
1386                         x0212_f = TRUE;
1387 #endif
1388                         break;
1389                     case ISO_2022_JP_3:
1390                         input_f = JIS_INPUT;
1391 #ifdef X0212_ENABLE
1392                         x0212_f = TRUE;
1393 #endif
1394                         x0213_f = TRUE;
1395                         break;
1396                     case SHIFT_JIS:
1397                         input_f = SJIS_INPUT;
1398                         break;
1399                     case WINDOWS_31J:
1400                         input_f = SJIS_INPUT;
1401 #ifdef SHIFTJIS_CP932
1402                         cp51932_f = TRUE;
1403 #endif
1404 #ifdef UTF8_OUTPUT_ENABLE
1405                         ms_ucs_map_f = UCS_MAP_CP932;
1406 #endif
1407                         break;
1408                     case CP10001:
1409                         input_f = SJIS_INPUT;
1410 #ifdef SHIFTJIS_CP932
1411                         cp51932_f = TRUE;
1412 #endif
1413 #ifdef UTF8_OUTPUT_ENABLE
1414                         ms_ucs_map_f = UCS_MAP_CP10001;
1415 #endif
1416                         break;
1417                     case EUC_JP:
1418                         input_f = EUC_INPUT;
1419                         break;
1420                     case CP51932:
1421                         input_f = EUC_INPUT;
1422 #ifdef SHIFTJIS_CP932
1423                         cp51932_f = TRUE;
1424 #endif
1425 #ifdef UTF8_OUTPUT_ENABLE
1426                         ms_ucs_map_f = UCS_MAP_CP932;
1427 #endif
1428                         break;
1429                     case EUCJP_MS:
1430                         input_f = EUC_INPUT;
1431 #ifdef SHIFTJIS_CP932
1432                         cp51932_f = FALSE;
1433 #endif
1434 #ifdef UTF8_OUTPUT_ENABLE
1435                         ms_ucs_map_f = UCS_MAP_MS;
1436 #endif
1437                         break;
1438                     case EUCJP_ASCII:
1439                         input_f = EUC_INPUT;
1440 #ifdef SHIFTJIS_CP932
1441                         cp51932_f = FALSE;
1442 #endif
1443 #ifdef UTF8_OUTPUT_ENABLE
1444                         ms_ucs_map_f = UCS_MAP_ASCII;
1445 #endif
1446                         break;
1447                     case SHIFT_JISX0213:
1448                     case SHIFT_JIS_2004:
1449                         input_f = SJIS_INPUT;
1450                         x0213_f = TRUE;
1451 #ifdef SHIFTJIS_CP932
1452                         cp51932_f = FALSE;
1453 #endif
1454                         break;
1455                     case EUC_JISX0213:
1456                     case EUC_JIS_2004:
1457                         input_f = EUC_INPUT;
1458                         x0213_f = TRUE;
1459 #ifdef SHIFTJIS_CP932
1460                         cp51932_f = FALSE;
1461 #endif
1462                         break;
1463 #ifdef UTF8_INPUT_ENABLE
1464                     case UTF_8:
1465                     case UTF_8N:
1466                     case UTF_8_BOM:
1467                         input_f = UTF8_INPUT;
1468                         break;
1469 #ifdef UNICODE_NORMALIZATION
1470                     case UTF8_MAC:
1471                         input_f = UTF8_INPUT;
1472                         nfc_f = TRUE;
1473                         break;
1474 #endif
1475                     case UTF_16:
1476                     case UTF_16BE:
1477                     case UTF_16BE_BOM:
1478                         input_f = UTF16_INPUT;
1479                         input_endian = ENDIAN_BIG;
1480                         break;
1481                     case UTF_16LE:
1482                     case UTF_16LE_BOM:
1483                         input_f = UTF16_INPUT;
1484                         input_endian = ENDIAN_LITTLE;
1485                         break;
1486                     case UTF_32:
1487                     case UTF_32BE:
1488                     case UTF_32BE_BOM:
1489                         input_f = UTF32_INPUT;
1490                         input_endian = ENDIAN_BIG;
1491                         break;
1492                     case UTF_32LE:
1493                     case UTF_32LE_BOM:
1494                         input_f = UTF32_INPUT;
1495                         input_endian = ENDIAN_LITTLE;
1496                         break;
1497 #endif
1498                     default:
1499                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1500                         break;
1501                     }
1502                     continue;
1503                 }
1504                 if (strcmp(long_option[i].name, "oc=") == 0){
1505                     nkf_str_upcase(p, codeset, 32);
1506                     output_encoding = nkf_enc_find_index(codeset);
1507                     x0201_f = FALSE;
1508                     switch (output_encoding) {
1509                     case ISO_2022_JP:
1510                         output_conv = j_oconv;
1511                         break;
1512                     case CP50220:
1513                             output_conv = j_oconv;
1514                             x0201_f = TRUE;
1515 #ifdef SHIFTJIS_CP932
1516                             cp932inv_f = FALSE;
1517 #endif
1518 #ifdef UTF8_OUTPUT_ENABLE
1519                             ms_ucs_map_f = UCS_MAP_CP932;
1520 #endif
1521                         break;
1522                     case CP50221:
1523                         output_conv = j_oconv;
1524 #ifdef SHIFTJIS_CP932
1525                         cp932inv_f = FALSE;
1526 #endif
1527 #ifdef UTF8_OUTPUT_ENABLE
1528                         ms_ucs_map_f = UCS_MAP_CP932;
1529 #endif
1530                         break;
1531                     case ISO_2022_JP_1:
1532                         output_conv = j_oconv;
1533 #ifdef X0212_ENABLE
1534                         x0212_f = TRUE;
1535 #endif
1536 #ifdef SHIFTJIS_CP932
1537                         cp932inv_f = FALSE;
1538 #endif
1539                         break;
1540                     case ISO_2022_JP_3:
1541                         output_conv = j_oconv;
1542 #ifdef X0212_ENABLE
1543                         x0212_f = TRUE;
1544 #endif
1545                         x0213_f = TRUE;
1546 #ifdef SHIFTJIS_CP932
1547                         cp932inv_f = FALSE;
1548 #endif
1549                         break;
1550                     case SHIFT_JIS:
1551                         output_conv = s_oconv;
1552                         break;
1553                     case WINDOWS_31J:
1554                         output_conv = s_oconv;
1555 #ifdef UTF8_OUTPUT_ENABLE
1556                         ms_ucs_map_f = UCS_MAP_CP932;
1557 #endif
1558                         break;
1559                     case CP10001:
1560                         output_conv = s_oconv;
1561 #ifdef UTF8_OUTPUT_ENABLE
1562                         ms_ucs_map_f = UCS_MAP_CP10001;
1563 #endif
1564                         break;
1565                     case EUC_JP:
1566                         output_conv = e_oconv;
1567                         break;
1568                     case CP51932:
1569                         output_conv = e_oconv;
1570 #ifdef SHIFTJIS_CP932
1571                         cp932inv_f = FALSE;
1572 #endif
1573 #ifdef UTF8_OUTPUT_ENABLE
1574                         ms_ucs_map_f = UCS_MAP_CP932;
1575 #endif
1576                         break;
1577                     case EUCJP_MS:
1578                         output_conv = e_oconv;
1579 #ifdef X0212_ENABLE
1580                         x0212_f = TRUE;
1581 #endif
1582 #ifdef UTF8_OUTPUT_ENABLE
1583                         ms_ucs_map_f = UCS_MAP_MS;
1584 #endif
1585                         break;
1586                     case EUCJP_ASCII:
1587                         output_conv = e_oconv;
1588 #ifdef X0212_ENABLE
1589                         x0212_f = TRUE;
1590 #endif
1591 #ifdef UTF8_OUTPUT_ENABLE
1592                         ms_ucs_map_f = UCS_MAP_ASCII;
1593 #endif
1594                         break;
1595                     case SHIFT_JISX0213:
1596                     case SHIFT_JIS_2004:
1597                             output_conv = s_oconv;
1598                             x0213_f = TRUE;
1599 #ifdef SHIFTJIS_CP932
1600                             cp932inv_f = FALSE;
1601 #endif
1602                         break;
1603                     case EUC_JISX0213:
1604                     case EUC_JIS_2004:
1605                         output_conv = e_oconv;
1606 #ifdef X0212_ENABLE
1607                         x0212_f = TRUE;
1608 #endif
1609                         x0213_f = TRUE;
1610 #ifdef SHIFTJIS_CP932
1611                         cp932inv_f = FALSE;
1612 #endif
1613                         break;
1614 #ifdef UTF8_OUTPUT_ENABLE
1615                     case UTF_8:
1616                     case UTF_8N:
1617                         output_conv = w_oconv;
1618                         break;
1619                     case UTF_8_BOM:
1620                         output_conv = w_oconv;
1621                         output_bom_f = TRUE;
1622                         break;
1623                     case UTF_16BE:
1624                         output_conv = w_oconv16;
1625                         break;
1626                     case UTF_16:
1627                     case UTF_16BE_BOM:
1628                         output_conv = w_oconv16;
1629                         output_bom_f = TRUE;
1630                         break;
1631                     case UTF_16LE:
1632                         output_conv = w_oconv16;
1633                         output_endian = ENDIAN_LITTLE;
1634                         break;
1635                     case UTF_16LE_BOM:
1636                         output_conv = w_oconv16;
1637                         output_endian = ENDIAN_LITTLE;
1638                         output_bom_f = TRUE;
1639                         break;
1640                     case UTF_32:
1641                     case UTF_32BE:
1642                         output_conv = w_oconv32;
1643                         break;
1644                     case UTF_32BE_BOM:
1645                         output_conv = w_oconv32;
1646                         output_bom_f = TRUE;
1647                         break;
1648                     case UTF_32LE:
1649                         output_conv = w_oconv32;
1650                         output_endian = ENDIAN_LITTLE;
1651                         break;
1652                     case UTF_32LE_BOM:
1653                         output_conv = w_oconv32;
1654                         output_endian = ENDIAN_LITTLE;
1655                         output_bom_f = TRUE;
1656                         break;
1657 #endif
1658                     default:
1659                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1660                         break;
1661                     }
1662                     continue;
1663                 }
1664                 if (strcmp(long_option[i].name, "guess=") == 0){
1665                     if (p[0] == '1') {
1666                         guess_f = 2;
1667                     } else {
1668                         guess_f = 1;
1669                     }
1670                     continue;
1671                 }
1672 #ifdef OVERWRITE
1673                 if (strcmp(long_option[i].name, "overwrite") == 0){
1674                     file_out_f = TRUE;
1675                     overwrite_f = TRUE;
1676                     preserve_time_f = TRUE;
1677                     continue;
1678                 }
1679                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1680                     file_out_f = TRUE;
1681                     overwrite_f = TRUE;
1682                     preserve_time_f = TRUE;
1683                     backup_f = TRUE;
1684                     backup_suffix = malloc(strlen((char *) p) + 1);
1685                     strcpy(backup_suffix, (char *) p);
1686                     continue;
1687                 }
1688                 if (strcmp(long_option[i].name, "in-place") == 0){
1689                     file_out_f = TRUE;
1690                     overwrite_f = TRUE;
1691                     preserve_time_f = FALSE;
1692                     continue;
1693                 }
1694                 if (strcmp(long_option[i].name, "in-place=") == 0){
1695                     file_out_f = TRUE;
1696                     overwrite_f = TRUE;
1697                     preserve_time_f = FALSE;
1698                     backup_f = TRUE;
1699                     backup_suffix = malloc(strlen((char *) p) + 1);
1700                     strcpy(backup_suffix, (char *) p);
1701                     continue;
1702                 }
1703 #endif
1704 #ifdef INPUT_OPTION
1705                 if (strcmp(long_option[i].name, "cap-input") == 0){
1706                     cap_f = TRUE;
1707                     continue;
1708                 }
1709                 if (strcmp(long_option[i].name, "url-input") == 0){
1710                     url_f = TRUE;
1711                     continue;
1712                 }
1713 #endif
1714 #ifdef NUMCHAR_OPTION
1715                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1716                     numchar_f = TRUE;
1717                     continue;
1718                 }
1719 #endif
1720 #ifdef CHECK_OPTION
1721                 if (strcmp(long_option[i].name, "no-output") == 0){
1722                     noout_f = TRUE;
1723                     continue;
1724                 }
1725                 if (strcmp(long_option[i].name, "debug") == 0){
1726                     debug_f = TRUE;
1727                     continue;
1728                 }
1729 #endif
1730                 if (strcmp(long_option[i].name, "cp932") == 0){
1731 #ifdef SHIFTJIS_CP932
1732                     cp51932_f = TRUE;
1733                     cp932inv_f = TRUE;
1734 #endif
1735 #ifdef UTF8_OUTPUT_ENABLE
1736                     ms_ucs_map_f = UCS_MAP_CP932;
1737 #endif
1738                     continue;
1739                 }
1740                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1741 #ifdef SHIFTJIS_CP932
1742                     cp51932_f = FALSE;
1743                     cp932inv_f = FALSE;
1744 #endif
1745 #ifdef UTF8_OUTPUT_ENABLE
1746                     ms_ucs_map_f = UCS_MAP_ASCII;
1747 #endif
1748                     continue;
1749                 }
1750 #ifdef SHIFTJIS_CP932
1751                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1752                     cp932inv_f = TRUE;
1753                     continue;
1754                 }
1755 #endif
1756
1757 #ifdef X0212_ENABLE
1758                 if (strcmp(long_option[i].name, "x0212") == 0){
1759                     x0212_f = TRUE;
1760                     continue;
1761                 }
1762 #endif
1763
1764 #ifdef EXEC_IO
1765                   if (strcmp(long_option[i].name, "exec-in") == 0){
1766                       exec_f = 1;
1767                       return;
1768                   }
1769                   if (strcmp(long_option[i].name, "exec-out") == 0){
1770                       exec_f = -1;
1771                       return;
1772                   }
1773 #endif
1774 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1775                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1776                     no_cp932ext_f = TRUE;
1777                     continue;
1778                 }
1779                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1780                     no_best_fit_chars_f = TRUE;
1781                     continue;
1782                 }
1783                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1784                     encode_fallback = NULL;
1785                     continue;
1786                 }
1787                 if (strcmp(long_option[i].name, "fb-html") == 0){
1788                     encode_fallback = encode_fallback_html;
1789                     continue;
1790                 }
1791                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1792                     encode_fallback = encode_fallback_xml;
1793                     continue;
1794                 }
1795                 if (strcmp(long_option[i].name, "fb-java") == 0){
1796                     encode_fallback = encode_fallback_java;
1797                     continue;
1798                 }
1799                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1800                     encode_fallback = encode_fallback_perl;
1801                     continue;
1802                 }
1803                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1804                     encode_fallback = encode_fallback_subchar;
1805                     continue;
1806                 }
1807                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1808                     encode_fallback = encode_fallback_subchar;
1809                     unicode_subchar = 0;
1810                     if (p[0] != '0'){
1811                         /* decimal number */
1812                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1813                             unicode_subchar *= 10;
1814                             unicode_subchar += hex2bin(p[i]);
1815                         }
1816                     }else if(p[1] == 'x' || p[1] == 'X'){
1817                         /* hexadecimal number */
1818                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1819                             unicode_subchar <<= 4;
1820                             unicode_subchar |= hex2bin(p[i]);
1821                         }
1822                     }else{
1823                         /* octal number */
1824                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1825                             unicode_subchar *= 8;
1826                             unicode_subchar += hex2bin(p[i]);
1827                         }
1828                     }
1829                     w16e_conv(unicode_subchar, &i, &j);
1830                     unicode_subchar = i<<8 | j;
1831                     continue;
1832                 }
1833 #endif
1834 #ifdef UTF8_OUTPUT_ENABLE
1835                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1836                     ms_ucs_map_f = UCS_MAP_MS;
1837                     continue;
1838                 }
1839 #endif
1840 #ifdef UNICODE_NORMALIZATION
1841                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1842                     input_f = UTF8_INPUT;
1843                     nfc_f = TRUE;
1844                     continue;
1845                 }
1846 #endif
1847                 if (strcmp(long_option[i].name, "prefix=") == 0){
1848                     if (nkf_isgraph(p[0])){
1849                         for (i = 1; nkf_isgraph(p[i]); i++){
1850                             prefix_table[p[i]] = p[0];
1851                         }
1852                     }
1853                     continue;
1854                 }
1855             }
1856             continue;
1857         case 'b':           /* buffered mode */
1858             unbuf_f = FALSE;
1859             continue;
1860         case 'u':           /* non bufferd mode */
1861             unbuf_f = TRUE;
1862             continue;
1863         case 't':           /* transparent mode */
1864             if (*cp=='1') {
1865                 /* alias of -t */
1866                 nop_f = TRUE;
1867                 *cp++;
1868             } else if (*cp=='2') {
1869                 /*
1870                  * -t with put/get
1871                  *
1872                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1873                  *
1874                  */
1875                 nop_f = 2;
1876                 *cp++;
1877             } else
1878                 nop_f = TRUE;
1879             continue;
1880         case 'j':           /* JIS output */
1881         case 'n':
1882             output_conv = j_oconv;
1883             output_encoding = ISO_2022_JP;
1884             continue;
1885         case 'e':           /* AT&T EUC output */
1886             output_conv = e_oconv;
1887             cp932inv_f = FALSE;
1888             output_encoding = EUC_JP;
1889             continue;
1890         case 's':           /* SJIS output */
1891             output_conv = s_oconv;
1892             output_encoding = SHIFT_JIS;
1893             continue;
1894         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1895             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1896             input_f = LATIN1_INPUT;
1897             continue;
1898         case 'i':           /* Kanji IN ESC-$-@/B */
1899             if (*cp=='@'||*cp=='B')
1900                 kanji_intro = *cp++;
1901             continue;
1902         case 'o':           /* ASCII IN ESC-(-J/B */
1903             if (*cp=='J'||*cp=='B'||*cp=='H')
1904                 ascii_intro = *cp++;
1905             continue;
1906         case 'h':
1907             /*
1908                 bit:1   katakana->hiragana
1909                 bit:2   hiragana->katakana
1910             */
1911             if ('9'>= *cp && *cp>='0')
1912                 hira_f |= (*cp++ -'0');
1913             else
1914                 hira_f |= 1;
1915             continue;
1916         case 'r':
1917             rot_f = TRUE;
1918             continue;
1919 #if defined(MSDOS) || defined(__OS2__)
1920         case 'T':
1921             binmode_f = FALSE;
1922             continue;
1923 #endif
1924 #ifndef PERL_XS
1925         case 'V':
1926             show_configuration();
1927             exit(1);
1928             break;
1929         case 'v':
1930             usage();
1931             exit(1);
1932             break;
1933 #endif
1934 #ifdef UTF8_OUTPUT_ENABLE
1935         case 'w':           /* UTF-8 output */
1936             if (cp[0] == '8') {
1937                 output_conv = w_oconv; cp++;
1938                 if (cp[0] == '0'){
1939                     cp++;
1940                     output_encoding = UTF_8N;
1941                 } else {
1942                     output_bom_f = TRUE;
1943                     output_encoding = UTF_8_BOM;
1944                 }
1945             } else {
1946                 if ('1'== cp[0] && '6'==cp[1]) {
1947                     output_conv = w_oconv16; cp+=2;
1948                     output_encoding = UTF_16;
1949                 } else if ('3'== cp[0] && '2'==cp[1]) {
1950                     output_conv = w_oconv32; cp+=2;
1951                     output_encoding = UTF_32;
1952                 } else {
1953                     output_conv = w_oconv;
1954                     output_encoding = UTF_8;
1955                     continue;
1956                 }
1957                 if (cp[0]=='L') {
1958                     cp++;
1959                     output_endian = ENDIAN_LITTLE;
1960                 } else if (cp[0] == 'B') {
1961                     cp++;
1962                 } else {
1963                     continue;
1964                 }
1965                 if (cp[0] == '0'){
1966                     cp++;
1967                     output_encoding = output_encoding == UTF_16
1968                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
1969                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
1970                 } else {
1971                     output_bom_f = TRUE;
1972                     output_encoding = output_encoding == UTF_16
1973                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
1974                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
1975                 }
1976             }
1977             continue;
1978 #endif
1979 #ifdef UTF8_INPUT_ENABLE
1980         case 'W':           /* UTF input */
1981             if (cp[0] == '8') {
1982                 cp++;
1983                 input_f = UTF8_INPUT;
1984             }else{
1985                 if ('1'== cp[0] && '6'==cp[1]) {
1986                     cp += 2;
1987                     input_f = UTF16_INPUT;
1988                     input_endian = ENDIAN_BIG;
1989                 } else if ('3'== cp[0] && '2'==cp[1]) {
1990                     cp += 2;
1991                     input_f = UTF32_INPUT;
1992                     input_endian = ENDIAN_BIG;
1993                 } else {
1994                     input_f = UTF8_INPUT;
1995                     continue;
1996                 }
1997                 if (cp[0]=='L') {
1998                     cp++;
1999                     input_endian = ENDIAN_LITTLE;
2000                 } else if (cp[0] == 'B') {
2001                     cp++;
2002                 }
2003             }
2004             continue;
2005 #endif
2006         /* Input code assumption */
2007         case 'J':   /* JIS input */
2008             input_f = JIS_INPUT;
2009             continue;
2010         case 'E':   /* AT&T EUC input */
2011             input_f = EUC_INPUT;
2012             continue;
2013         case 'S':   /* MS Kanji input */
2014             input_f = SJIS_INPUT;
2015             continue;
2016         case 'Z':   /* Convert X0208 alphabet to asii */
2017             /* alpha_f
2018                bit:0   Convert JIS X 0208 Alphabet to ASCII
2019                bit:1   Convert Kankaku to one space
2020                bit:2   Convert Kankaku to two spaces
2021                bit:3   Convert HTML Entity
2022                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
2023             */
2024             while ('0'<= *cp && *cp <='9') {
2025                 alpha_f |= 1 << (*cp++ - '0');
2026             }
2027             if (!alpha_f) alpha_f = 1;
2028             continue;
2029         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
2030             x0201_f = FALSE;    /* No X0201->X0208 conversion */
2031             /* accept  X0201
2032                     ESC-(-I     in JIS, EUC, MS Kanji
2033                     SI/SO       in JIS, EUC, MS Kanji
2034                     SSO         in EUC, JIS, not in MS Kanji
2035                     MS Kanji (0xa0-0xdf)
2036                output  X0201
2037                     ESC-(-I     in JIS (0x20-0x5f)
2038                     SSO         in EUC (0xa0-0xdf)
2039                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
2040             */
2041             continue;
2042         case 'X':   /* Convert X0201 kana to X0208 */
2043             x0201_f = TRUE;
2044             continue;
2045         case 'F':   /* prserve new lines */
2046             fold_preserve_f = TRUE;
2047         case 'f':   /* folding -f60 or -f */
2048             fold_f = TRUE;
2049             fold_len = 0;
2050             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2051                 fold_len *= 10;
2052                 fold_len += *cp++ - '0';
2053             }
2054             if (!(0<fold_len && fold_len<BUFSIZ))
2055                 fold_len = DEFAULT_FOLD;
2056             if (*cp=='-') {
2057                 fold_margin = 0;
2058                 cp++;
2059                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
2060                     fold_margin *= 10;
2061                     fold_margin += *cp++ - '0';
2062                 }
2063             }
2064             continue;
2065         case 'm':   /* MIME support */
2066             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
2067             if (*cp=='B'||*cp=='Q') {
2068                 mime_decode_mode = *cp++;
2069                 mimebuf_f = FIXED_MIME;
2070             } else if (*cp=='N') {
2071                 mime_f = TRUE; cp++;
2072             } else if (*cp=='S') {
2073                 mime_f = STRICT_MIME; cp++;
2074             } else if (*cp=='0') {
2075                 mime_decode_f = FALSE;
2076                 mime_f = FALSE; cp++;
2077             }
2078             continue;
2079         case 'M':   /* MIME output */
2080             if (*cp=='B') {
2081                 mimeout_mode = 'B';
2082                 mimeout_f = FIXED_MIME; cp++;
2083             } else if (*cp=='Q') {
2084                 mimeout_mode = 'Q';
2085                 mimeout_f = FIXED_MIME; cp++;
2086             } else {
2087                 mimeout_f = TRUE;
2088             }
2089             continue;
2090         case 'B':   /* Broken JIS support */
2091             /*  bit:0   no ESC JIS
2092                 bit:1   allow any x on ESC-(-x or ESC-$-x
2093                 bit:2   reset to ascii on NL
2094             */
2095             if ('9'>= *cp && *cp>='0')
2096                 broken_f |= 1<<(*cp++ -'0');
2097             else
2098                 broken_f |= TRUE;
2099             continue;
2100 #ifndef PERL_XS
2101         case 'O':/* for Output file */
2102             file_out_f = TRUE;
2103             continue;
2104 #endif
2105         case 'c':/* add cr code */
2106             nlmode_f = CRLF;
2107             continue;
2108         case 'd':/* delete cr code */
2109             nlmode_f = LF;
2110             continue;
2111         case 'I':   /* ISO-2022-JP output */
2112             iso2022jp_f = TRUE;
2113             continue;
2114         case 'L':  /* line mode */
2115             if (*cp=='u') {         /* unix */
2116                 nlmode_f = LF; cp++;
2117             } else if (*cp=='m') { /* mac */
2118                 nlmode_f = CR; cp++;
2119             } else if (*cp=='w') { /* windows */
2120                 nlmode_f = CRLF; cp++;
2121             } else if (*cp=='0') { /* no conversion  */
2122                 nlmode_f = 0; cp++;
2123             }
2124             continue;
2125 #ifndef PERL_XS
2126         case 'g':
2127             if (*cp == '1') {
2128                 guess_f = 2;
2129                 cp++;
2130             } else if (*cp == '0') {
2131                 guess_f = 1;
2132                 cp++;
2133             } else {
2134                 guess_f = 1;
2135             }
2136             continue;
2137 #endif
2138         case SP:
2139         /* module muliple options in a string are allowed for Perl moudle  */
2140             while(*cp && *cp++!='-');
2141             continue;
2142         default:
2143             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
2144             /* bogus option but ignored */
2145             continue;
2146         }
2147     }
2148 }
2149
2150 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2151 {
2152     if (iconv_func){
2153         struct input_code *p = input_code_list;
2154         while (p->name){
2155             if (iconv_func == p->iconv_func){
2156                 return p;
2157             }
2158             p++;
2159         }
2160     }
2161     return 0;
2162 }
2163
2164 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
2165 {
2166 #ifdef INPUT_CODE_FIX
2167     if (f || !input_f)
2168 #endif
2169         if (estab_f != f){
2170             estab_f = f;
2171         }
2172
2173     if (iconv_func
2174 #ifdef INPUT_CODE_FIX
2175         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
2176 #endif
2177         ){
2178         iconv = iconv_func;
2179     }
2180 #ifdef CHECK_OPTION
2181     if (estab_f && iconv_for_check != iconv){
2182         struct input_code *p = find_inputcode_byfunc(iconv);
2183         if (p){
2184             set_input_codename(p->name);
2185             debug(p->name);
2186         }
2187         iconv_for_check = iconv;
2188     }
2189 #endif
2190 }
2191
2192 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
2193 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
2194 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
2195 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
2196 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2197 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
2198 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
2199 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
2200
2201 #define SCORE_INIT (SCORE_iMIME)
2202
2203 static const char score_table_A0[] = {
2204     0, 0, 0, 0,
2205     0, 0, 0, 0,
2206     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2207     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2208 };
2209
2210 static const char score_table_F0[] = {
2211     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2212     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2213     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2214     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2215 };
2216
2217 void set_code_score(struct input_code *ptr, nkf_char score)
2218 {
2219     if (ptr){
2220         ptr->score |= score;
2221     }
2222 }
2223
2224 void clr_code_score(struct input_code *ptr, nkf_char score)
2225 {
2226     if (ptr){
2227         ptr->score &= ~score;
2228     }
2229 }
2230
2231 void code_score(struct input_code *ptr)
2232 {
2233     nkf_char c2 = ptr->buf[0];
2234 #ifdef UTF8_OUTPUT_ENABLE
2235     nkf_char c1 = ptr->buf[1];
2236 #endif
2237     if (c2 < 0){
2238         set_code_score(ptr, SCORE_ERROR);
2239     }else if (c2 == SSO){
2240         set_code_score(ptr, SCORE_KANA);
2241     }else if (c2 == 0x8f){
2242         set_code_score(ptr, SCORE_X0212);
2243 #ifdef UTF8_OUTPUT_ENABLE
2244     }else if (!e2w_conv(c2, c1)){
2245         set_code_score(ptr, SCORE_NO_EXIST);
2246 #endif
2247     }else if ((c2 & 0x70) == 0x20){
2248         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2249     }else if ((c2 & 0x70) == 0x70){
2250         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2251     }else if ((c2 & 0x70) >= 0x50){
2252         set_code_score(ptr, SCORE_L2);
2253     }
2254 }
2255
2256 void status_disable(struct input_code *ptr)
2257 {
2258     ptr->stat = -1;
2259     ptr->buf[0] = -1;
2260     code_score(ptr);
2261     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2262 }
2263
2264 void status_push_ch(struct input_code *ptr, nkf_char c)
2265 {
2266     ptr->buf[ptr->index++] = c;
2267 }
2268
2269 void status_clear(struct input_code *ptr)
2270 {
2271     ptr->stat = 0;
2272     ptr->index = 0;
2273 }
2274
2275 void status_reset(struct input_code *ptr)
2276 {
2277     status_clear(ptr);
2278     ptr->score = SCORE_INIT;
2279 }
2280
2281 void status_reinit(struct input_code *ptr)
2282 {
2283     status_reset(ptr);
2284     ptr->_file_stat = 0;
2285 }
2286
2287 void status_check(struct input_code *ptr, nkf_char c)
2288 {
2289     if (c <= DEL && estab_f){
2290         status_reset(ptr);
2291     }
2292 }
2293
2294 void s_status(struct input_code *ptr, nkf_char c)
2295 {
2296     switch(ptr->stat){
2297       case -1:
2298           status_check(ptr, c);
2299           break;
2300       case 0:
2301           if (c <= DEL){
2302               break;
2303 #ifdef NUMCHAR_OPTION
2304           }else if (is_unicode_capsule(c)){
2305               break;
2306 #endif
2307           }else if (0xa1 <= c && c <= 0xdf){
2308               status_push_ch(ptr, SSO);
2309               status_push_ch(ptr, c);
2310               code_score(ptr);
2311               status_clear(ptr);
2312           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2313               ptr->stat = 1;
2314               status_push_ch(ptr, c);
2315           }else if (0xed <= c && c <= 0xee){
2316               ptr->stat = 3;
2317               status_push_ch(ptr, c);
2318 #ifdef SHIFTJIS_CP932
2319           }else if (is_ibmext_in_sjis(c)){
2320               ptr->stat = 2;
2321               status_push_ch(ptr, c);
2322 #endif /* SHIFTJIS_CP932 */
2323 #ifdef X0212_ENABLE
2324           }else if (0xf0 <= c && c <= 0xfc){
2325               ptr->stat = 1;
2326               status_push_ch(ptr, c);
2327 #endif /* X0212_ENABLE */
2328           }else{
2329               status_disable(ptr);
2330           }
2331           break;
2332       case 1:
2333           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2334               status_push_ch(ptr, c);
2335               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2336               code_score(ptr);
2337               status_clear(ptr);
2338           }else{
2339               status_disable(ptr);
2340           }
2341           break;
2342       case 2:
2343 #ifdef SHIFTJIS_CP932
2344         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2345             status_push_ch(ptr, c);
2346             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2347                 set_code_score(ptr, SCORE_CP932);
2348                 status_clear(ptr);
2349                 break;
2350             }
2351         }
2352 #endif /* SHIFTJIS_CP932 */
2353         status_disable(ptr);
2354           break;
2355       case 3:
2356           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2357               status_push_ch(ptr, c);
2358               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2359             set_code_score(ptr, SCORE_CP932);
2360             status_clear(ptr);
2361           }else{
2362               status_disable(ptr);
2363           }
2364           break;
2365     }
2366 }
2367
2368 void e_status(struct input_code *ptr, nkf_char c)
2369 {
2370     switch (ptr->stat){
2371       case -1:
2372           status_check(ptr, c);
2373           break;
2374       case 0:
2375           if (c <= DEL){
2376               break;
2377 #ifdef NUMCHAR_OPTION
2378           }else if (is_unicode_capsule(c)){
2379               break;
2380 #endif
2381           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2382               ptr->stat = 1;
2383               status_push_ch(ptr, c);
2384 #ifdef X0212_ENABLE
2385           }else if (0x8f == c){
2386               ptr->stat = 2;
2387               status_push_ch(ptr, c);
2388 #endif /* X0212_ENABLE */
2389           }else{
2390               status_disable(ptr);
2391           }
2392           break;
2393       case 1:
2394           if (0xa1 <= c && c <= 0xfe){
2395               status_push_ch(ptr, c);
2396               code_score(ptr);
2397               status_clear(ptr);
2398           }else{
2399               status_disable(ptr);
2400           }
2401           break;
2402 #ifdef X0212_ENABLE
2403       case 2:
2404           if (0xa1 <= c && c <= 0xfe){
2405               ptr->stat = 1;
2406               status_push_ch(ptr, c);
2407           }else{
2408               status_disable(ptr);
2409           }
2410 #endif /* X0212_ENABLE */
2411     }
2412 }
2413
2414 #ifdef UTF8_INPUT_ENABLE
2415 void w_status(struct input_code *ptr, nkf_char c)
2416 {
2417     switch (ptr->stat){
2418       case -1:
2419           status_check(ptr, c);
2420           break;
2421       case 0:
2422           if (c <= DEL){
2423               break;
2424 #ifdef NUMCHAR_OPTION
2425           }else if (is_unicode_capsule(c)){
2426               break;
2427 #endif
2428           }else if (0xc0 <= c && c <= 0xdf){
2429               ptr->stat = 1;
2430               status_push_ch(ptr, c);
2431           }else if (0xe0 <= c && c <= 0xef){
2432               ptr->stat = 2;
2433               status_push_ch(ptr, c);
2434           }else if (0xf0 <= c && c <= 0xf4){
2435               ptr->stat = 3;
2436               status_push_ch(ptr, c);
2437           }else{
2438               status_disable(ptr);
2439           }
2440           break;
2441       case 1:
2442       case 2:
2443           if (0x80 <= c && c <= 0xbf){
2444               status_push_ch(ptr, c);
2445               if (ptr->index > ptr->stat){
2446                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2447                              && ptr->buf[2] == 0xbf);
2448                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2449                            &ptr->buf[0], &ptr->buf[1]);
2450                   if (!bom){
2451                       code_score(ptr);
2452                   }
2453                   status_clear(ptr);
2454               }
2455           }else{
2456               status_disable(ptr);
2457           }
2458           break;
2459       case 3:
2460         if (0x80 <= c && c <= 0xbf){
2461             if (ptr->index < ptr->stat){
2462                 status_push_ch(ptr, c);
2463             } else {
2464                 status_clear(ptr);
2465             }
2466           }else{
2467               status_disable(ptr);
2468           }
2469           break;
2470     }
2471 }
2472 #endif
2473
2474 void code_status(nkf_char c)
2475 {
2476     int action_flag = 1;
2477     struct input_code *result = 0;
2478     struct input_code *p = input_code_list;
2479     while (p->name){
2480         if (!p->status_func) {
2481             ++p;
2482             continue;
2483         }
2484         if (!p->status_func)
2485             continue;
2486         (p->status_func)(p, c);
2487         if (p->stat > 0){
2488             action_flag = 0;
2489         }else if(p->stat == 0){
2490             if (result){
2491                 action_flag = 0;
2492             }else{
2493                 result = p;
2494             }
2495         }
2496         ++p;
2497     }
2498
2499     if (action_flag){
2500         if (result && !estab_f){
2501             set_iconv(TRUE, result->iconv_func);
2502         }else if (c <= DEL){
2503             struct input_code *ptr = input_code_list;
2504             while (ptr->name){
2505                 status_reset(ptr);
2506                 ++ptr;
2507             }
2508         }
2509     }
2510 }
2511
2512 #ifndef WIN32DLL
2513 nkf_char std_getc(FILE *f)
2514 {
2515     if (std_gc_ndx){
2516         return std_gc_buf[--std_gc_ndx];
2517     }
2518     return getc(f);
2519 }
2520 #endif /*WIN32DLL*/
2521
2522 nkf_char std_ungetc(nkf_char c, FILE *f)
2523 {
2524     if (std_gc_ndx == STD_GC_BUFSIZE){
2525         return EOF;
2526     }
2527     std_gc_buf[std_gc_ndx++] = c;
2528     return c;
2529 }
2530
2531 #ifndef WIN32DLL
2532 void std_putc(nkf_char c)
2533 {
2534     if(c!=EOF)
2535       putchar(c);
2536 }
2537 #endif /*WIN32DLL*/
2538
2539 #if !defined(PERL_XS) && !defined(WIN32DLL)
2540 nkf_char noconvert(FILE *f)
2541 {
2542     nkf_char    c;
2543
2544     if (nop_f == 2)
2545         module_connection();
2546     while ((c = (*i_getc)(f)) != EOF)
2547       (*o_putc)(c);
2548     (*o_putc)(EOF);
2549     return 1;
2550 }
2551 #endif
2552
2553 void module_connection(void)
2554 {
2555     oconv = output_conv;
2556     o_putc = std_putc;
2557
2558     /* replace continucation module, from output side */
2559
2560     /* output redicrection */
2561 #ifdef CHECK_OPTION
2562     if (noout_f || guess_f){
2563         o_putc = no_putc;
2564     }
2565 #endif
2566     if (mimeout_f) {
2567         o_mputc = o_putc;
2568         o_putc = mime_putc;
2569         if (mimeout_f == TRUE) {
2570             o_base64conv = oconv; oconv = base64_conv;
2571         }
2572         /* base64_count = 0; */
2573     }
2574
2575     if (nlmode_f || guess_f) {
2576         o_nlconv = oconv; oconv = nl_conv;
2577     }
2578     if (rot_f) {
2579         o_rot_conv = oconv; oconv = rot_conv;
2580     }
2581     if (iso2022jp_f) {
2582         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2583     }
2584     if (hira_f) {
2585         o_hira_conv = oconv; oconv = hira_conv;
2586     }
2587     if (fold_f) {
2588         o_fconv = oconv; oconv = fold_conv;
2589         f_line = 0;
2590     }
2591     if (alpha_f || x0201_f) {
2592         o_zconv = oconv; oconv = z_conv;
2593     }
2594
2595     i_getc = std_getc;
2596     i_ungetc = std_ungetc;
2597     /* input redicrection */
2598 #ifdef INPUT_OPTION
2599     if (cap_f){
2600         i_cgetc = i_getc; i_getc = cap_getc;
2601         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2602     }
2603     if (url_f){
2604         i_ugetc = i_getc; i_getc = url_getc;
2605         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2606     }
2607 #endif
2608 #ifdef NUMCHAR_OPTION
2609     if (numchar_f){
2610         i_ngetc = i_getc; i_getc = numchar_getc;
2611         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2612     }
2613 #endif
2614 #ifdef UNICODE_NORMALIZATION
2615     if (nfc_f && input_f == UTF8_INPUT){
2616         i_nfc_getc = i_getc; i_getc = nfc_getc;
2617         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2618     }
2619 #endif
2620     if (mime_f && mimebuf_f==FIXED_MIME) {
2621         i_mgetc = i_getc; i_getc = mime_getc;
2622         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2623     }
2624     if (broken_f & 1) {
2625         i_bgetc = i_getc; i_getc = broken_getc;
2626         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2627     }
2628     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2629         set_iconv(-TRUE, e_iconv);
2630     } else if (input_f == SJIS_INPUT) {
2631         set_iconv(-TRUE, s_iconv);
2632 #ifdef UTF8_INPUT_ENABLE
2633     } else if (input_f == UTF8_INPUT) {
2634         set_iconv(-TRUE, w_iconv);
2635     } else if (input_f == UTF16_INPUT) {
2636         set_iconv(-TRUE, w_iconv16);
2637     } else if (input_f == UTF32_INPUT) {
2638         set_iconv(-TRUE, w_iconv32);
2639 #endif
2640     } else {
2641         set_iconv(FALSE, e_iconv);
2642     }
2643
2644     {
2645         struct input_code *p = input_code_list;
2646         while (p->name){
2647             status_reinit(p++);
2648         }
2649     }
2650 }
2651
2652 /*
2653  * Check and Ignore BOM
2654  */
2655 void check_bom(FILE *f)
2656 {
2657     int c2;
2658     switch(c2 = (*i_getc)(f)){
2659     case 0x00:
2660         if((c2 = (*i_getc)(f)) == 0x00){
2661             if((c2 = (*i_getc)(f)) == 0xFE){
2662                 if((c2 = (*i_getc)(f)) == 0xFF){
2663                     if(!input_f){
2664                         set_iconv(TRUE, w_iconv32);
2665                     }
2666                     if (iconv == w_iconv32) {
2667                         input_endian = ENDIAN_BIG;
2668                         return;
2669                     }
2670                     (*i_ungetc)(0xFF,f);
2671                 }else (*i_ungetc)(c2,f);
2672                 (*i_ungetc)(0xFE,f);
2673             }else if(c2 == 0xFF){
2674                 if((c2 = (*i_getc)(f)) == 0xFE){
2675                     if(!input_f){
2676                         set_iconv(TRUE, w_iconv32);
2677                     }
2678                     if (iconv == w_iconv32) {
2679                         input_endian = ENDIAN_2143;
2680                         return;
2681                     }
2682                     (*i_ungetc)(0xFF,f);
2683                 }else (*i_ungetc)(c2,f);
2684                 (*i_ungetc)(0xFF,f);
2685             }else (*i_ungetc)(c2,f);
2686             (*i_ungetc)(0x00,f);
2687         }else (*i_ungetc)(c2,f);
2688         (*i_ungetc)(0x00,f);
2689         break;
2690     case 0xEF:
2691         if((c2 = (*i_getc)(f)) == 0xBB){
2692             if((c2 = (*i_getc)(f)) == 0xBF){
2693                 if(!input_f){
2694                     set_iconv(TRUE, w_iconv);
2695                 }
2696                 if (iconv == w_iconv) {
2697                     return;
2698                 }
2699                 (*i_ungetc)(0xBF,f);
2700             }else (*i_ungetc)(c2,f);
2701             (*i_ungetc)(0xBB,f);
2702         }else (*i_ungetc)(c2,f);
2703         (*i_ungetc)(0xEF,f);
2704         break;
2705     case 0xFE:
2706         if((c2 = (*i_getc)(f)) == 0xFF){
2707             if((c2 = (*i_getc)(f)) == 0x00){
2708                 if((c2 = (*i_getc)(f)) == 0x00){
2709                     if(!input_f){
2710                         set_iconv(TRUE, w_iconv32);
2711                     }
2712                     if (iconv == w_iconv32) {
2713                         input_endian = ENDIAN_3412;
2714                         return;
2715                     }
2716                     (*i_ungetc)(0x00,f);
2717                 }else (*i_ungetc)(c2,f);
2718                 (*i_ungetc)(0x00,f);
2719             }else (*i_ungetc)(c2,f);
2720             if(!input_f){
2721                 set_iconv(TRUE, w_iconv16);
2722             }
2723             if (iconv == w_iconv16) {
2724                 input_endian = ENDIAN_BIG;
2725                 return;
2726             }
2727             (*i_ungetc)(0xFF,f);
2728         }else (*i_ungetc)(c2,f);
2729         (*i_ungetc)(0xFE,f);
2730         break;
2731     case 0xFF:
2732         if((c2 = (*i_getc)(f)) == 0xFE){
2733             if((c2 = (*i_getc)(f)) == 0x00){
2734                 if((c2 = (*i_getc)(f)) == 0x00){
2735                     if(!input_f){
2736                         set_iconv(TRUE, w_iconv32);
2737                     }
2738                     if (iconv == w_iconv32) {
2739                         input_endian = ENDIAN_LITTLE;
2740                         return;
2741                     }
2742                     (*i_ungetc)(0x00,f);
2743                 }else (*i_ungetc)(c2,f);
2744                 (*i_ungetc)(0x00,f);
2745             }else (*i_ungetc)(c2,f);
2746             if(!input_f){
2747                 set_iconv(TRUE, w_iconv16);
2748             }
2749             if (iconv == w_iconv16) {
2750                 input_endian = ENDIAN_LITTLE;
2751                 return;
2752             }
2753             (*i_ungetc)(0xFE,f);
2754         }else (*i_ungetc)(c2,f);
2755         (*i_ungetc)(0xFF,f);
2756         break;
2757     default:
2758         (*i_ungetc)(c2,f);
2759         break;
2760     }
2761 }
2762
2763 /*
2764    Conversion main loop. Code detection only.
2765  */
2766
2767 nkf_char kanji_convert(FILE *f)
2768 {
2769     nkf_char    c3, c2=0, c1, c0=0;
2770     int is_8bit = FALSE;
2771
2772     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2773 #ifdef UTF8_INPUT_ENABLE
2774        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2775 #endif
2776       ){
2777         is_8bit = TRUE;
2778     }
2779
2780     input_mode = ASCII;
2781     output_mode = ASCII;
2782     shift_mode = FALSE;
2783
2784 #define NEXT continue      /* no output, get next */
2785 #define SEND ;             /* output c1 and c2, get next */
2786 #define LAST break         /* end of loop, go closing  */
2787
2788     module_connection();
2789     check_bom(f);
2790
2791     while ((c1 = (*i_getc)(f)) != EOF) {
2792 #ifdef INPUT_CODE_FIX
2793         if (!input_f)
2794 #endif
2795             code_status(c1);
2796         if (c2) {
2797             /* second byte */
2798             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2799                 /* in case of 8th bit is on */
2800                 if (!estab_f&&!mime_decode_mode) {
2801                     /* in case of not established yet */
2802                     /* It is still ambiguious */
2803                     if (h_conv(f, c2, c1)==EOF)
2804                         LAST;
2805                     else
2806                         c2 = 0;
2807                     NEXT;
2808                 } else {
2809                     /* in case of already established */
2810                     if (c1 < AT) {
2811                         /* ignore bogus code and not CP5022x UCD */
2812                         c2 = 0;
2813                         NEXT;
2814                     } else {
2815                         SEND;
2816                     }
2817                 }
2818             } else
2819                 /* second byte, 7 bit code */
2820                 /* it might be kanji shitfted */
2821                 if ((c1 == DEL) || (c1 <= SP)) {
2822                     /* ignore bogus first code */
2823                     c2 = 0;
2824                     NEXT;
2825                 } else
2826                     SEND;
2827         } else {
2828             /* first byte */
2829 #ifdef UTF8_INPUT_ENABLE
2830             if (iconv == w_iconv16) {
2831                 if (input_endian == ENDIAN_BIG) {
2832                     c2 = c1;
2833                     if ((c1 = (*i_getc)(f)) != EOF) {
2834                         if (0xD8 <= c2 && c2 <= 0xDB) {
2835                             if ((c0 = (*i_getc)(f)) != EOF) {
2836                                 c0 <<= 8;
2837                                 if ((c3 = (*i_getc)(f)) != EOF) {
2838                                     c0 |= c3;
2839                                 } else c2 = EOF;
2840                             } else c2 = EOF;
2841                         }
2842                     } else c2 = EOF;
2843                 } else {
2844                     if ((c2 = (*i_getc)(f)) != EOF) {
2845                         if (0xD8 <= c2 && c2 <= 0xDB) {
2846                             if ((c3 = (*i_getc)(f)) != EOF) {
2847                                 if ((c0 = (*i_getc)(f)) != EOF) {
2848                                     c0 <<= 8;
2849                                     c0 |= c3;
2850                                 } else c2 = EOF;
2851                             } else c2 = EOF;
2852                         }
2853                     } else c2 = EOF;
2854                 }
2855                 SEND;
2856             } else if(iconv == w_iconv32){
2857                 int c3 = c1;
2858                 if((c2 = (*i_getc)(f)) != EOF &&
2859                    (c1 = (*i_getc)(f)) != EOF &&
2860                    (c0 = (*i_getc)(f)) != EOF){
2861                     switch(input_endian){
2862                     case ENDIAN_BIG:
2863                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2864                         break;
2865                     case ENDIAN_LITTLE:
2866                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2867                         break;
2868                     case ENDIAN_2143:
2869                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2870                         break;
2871                     case ENDIAN_3412:
2872                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2873                         break;
2874                     }
2875                     c2 = 0;
2876                 }else{
2877                     c2 = EOF;
2878                 }
2879                 SEND;
2880             } else
2881 #endif
2882 #ifdef NUMCHAR_OPTION
2883             if (is_unicode_capsule(c1)){
2884                 SEND;
2885             } else
2886 #endif
2887             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2888                 /* 8 bit code */
2889                 if (!estab_f && !iso8859_f) {
2890                     /* not established yet */
2891                     c2 = c1;
2892                     NEXT;
2893                 } else { /* estab_f==TRUE */
2894                     if (iso8859_f) {
2895                         c2 = ISO_8859_1;
2896                         c1 &= 0x7f;
2897                         SEND;
2898                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2899                         /* SJIS X0201 Case... */
2900                         if (iso2022jp_f && !x0201_f) {
2901                             (*oconv)(GETA1, GETA2);
2902                             NEXT;
2903                         } else {
2904                             c2 = JIS_X_0201;
2905                             c1 &= 0x7f;
2906                             SEND;
2907                         }
2908                     } else if (c1==SSO && iconv != s_iconv) {
2909                         /* EUC X0201 Case */
2910                         c1 = (*i_getc)(f);  /* skip SSO */
2911                         code_status(c1);
2912                         if (SSP<=c1 && c1<0xe0) {
2913                             if (iso2022jp_f && !x0201_f) {
2914                                 (*oconv)(GETA1, GETA2);
2915                                 NEXT;
2916                             } else {
2917                                 c2 = JIS_X_0201;
2918                                 c1 &= 0x7f;
2919                                 SEND;
2920                             }
2921                         } else  { /* bogus code, skip SSO and one byte */
2922                             NEXT;
2923                         }
2924                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2925                                (c1 == 0xFD || c1 == 0xFE)) {
2926                         /* CP10001 */
2927                         c2 = JIS_X_0201;
2928                         c1 &= 0x7f;
2929                         SEND;
2930                     } else {
2931                        /* already established */
2932                        c2 = c1;
2933                        NEXT;
2934                     }
2935                 }
2936             } else if ((c1 > SP) && (c1 != DEL)) {
2937                 /* in case of Roman characters */
2938                 if (shift_mode) {
2939                     /* output 1 shifted byte */
2940                     if (iso8859_f) {
2941                         c2 = ISO_8859_1;
2942                         SEND;
2943                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2944                       /* output 1 shifted byte */
2945                         if (iso2022jp_f && !x0201_f) {
2946                             (*oconv)(GETA1, GETA2);
2947                             NEXT;
2948                         } else {
2949                             c2 = JIS_X_0201;
2950                             SEND;
2951                         }
2952                     } else {
2953                         /* look like bogus code */
2954                         NEXT;
2955                     }
2956                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
2957                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
2958                     /* in case of Kanji shifted */
2959                     c2 = c1;
2960                     NEXT;
2961                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2962                     /* Check MIME code */
2963                     if ((c1 = (*i_getc)(f)) == EOF) {
2964                         (*oconv)(0, '=');
2965                         LAST;
2966                     } else if (c1 == '?') {
2967                         /* =? is mime conversion start sequence */
2968                         if(mime_f == STRICT_MIME) {
2969                             /* check in real detail */
2970                             if (mime_begin_strict(f) == EOF)
2971                                 LAST;
2972                             else
2973                                 NEXT;
2974                         } else if (mime_begin(f) == EOF)
2975                             LAST;
2976                         else
2977                             NEXT;
2978                     } else {
2979                         (*oconv)(0, '=');
2980                         (*i_ungetc)(c1,f);
2981                         NEXT;
2982                     }
2983                 } else {
2984                     /* normal ASCII code */
2985                     SEND;
2986                 }
2987             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2988                 shift_mode = FALSE;
2989                 NEXT;
2990             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2991                 shift_mode = TRUE;
2992                 NEXT;
2993             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2994                 if ((c1 = (*i_getc)(f)) == EOF) {
2995                     /*  (*oconv)(0, ESC); don't send bogus code */
2996                     LAST;
2997                 } else if (c1 == '$') {
2998                     if ((c1 = (*i_getc)(f)) == EOF) {
2999                         /*
3000                         (*oconv)(0, ESC); don't send bogus code
3001                         (*oconv)(0, '$'); */
3002                         LAST;
3003                     } else if (c1 == '@'|| c1 == 'B') {
3004                         /* This is kanji introduction */
3005                         input_mode = JIS_X_0208;
3006                         shift_mode = FALSE;
3007                         set_input_codename("ISO-2022-JP");
3008 #ifdef CHECK_OPTION
3009                         debug("ISO-2022-JP");
3010 #endif
3011                         NEXT;
3012                     } else if (c1 == '(') {
3013                         if ((c1 = (*i_getc)(f)) == EOF) {
3014                             /* don't send bogus code
3015                             (*oconv)(0, ESC);
3016                             (*oconv)(0, '$');
3017                             (*oconv)(0, '(');
3018                                 */
3019                             LAST;
3020                         } else if (c1 == '@'|| c1 == 'B') {
3021                             /* This is kanji introduction */
3022                             input_mode = JIS_X_0208;
3023                             shift_mode = FALSE;
3024                             NEXT;
3025 #ifdef X0212_ENABLE
3026                         } else if (c1 == 'D'){
3027                             input_mode = JIS_X_0212;
3028                             shift_mode = FALSE;
3029                             NEXT;
3030 #endif /* X0212_ENABLE */
3031                         } else if (c1 == (JIS_X_0213_1&0x7F)){
3032                             input_mode = JIS_X_0213_1;
3033                             shift_mode = FALSE;
3034                             NEXT;
3035                         } else if (c1 == (JIS_X_0213_2&0x7F)){
3036                             input_mode = JIS_X_0213_2;
3037                             shift_mode = FALSE;
3038                             NEXT;
3039                         } else {
3040                             /* could be some special code */
3041                             (*oconv)(0, ESC);
3042                             (*oconv)(0, '$');
3043                             (*oconv)(0, '(');
3044                             (*oconv)(0, c1);
3045                             NEXT;
3046                         }
3047                     } else if (broken_f&0x2) {
3048                         /* accept any ESC-(-x as broken code ... */
3049                         input_mode = JIS_X_0208;
3050                         shift_mode = FALSE;
3051                         NEXT;
3052                     } else {
3053                         (*oconv)(0, ESC);
3054                         (*oconv)(0, '$');
3055                         (*oconv)(0, c1);
3056                         NEXT;
3057                     }
3058                 } else if (c1 == '(') {
3059                     if ((c1 = (*i_getc)(f)) == EOF) {
3060                         /* don't send bogus code
3061                         (*oconv)(0, ESC);
3062                         (*oconv)(0, '('); */
3063                         LAST;
3064                     } else {
3065                         if (c1 == 'I') {
3066                             /* This is X0201 kana introduction */
3067                             input_mode = JIS_X_0201; shift_mode = JIS_X_0201;
3068                             NEXT;
3069                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
3070                             /* This is X0208 kanji introduction */
3071                             input_mode = ASCII; shift_mode = FALSE;
3072                             NEXT;
3073                         } else if (broken_f&0x2) {
3074                             input_mode = ASCII; shift_mode = FALSE;
3075                             NEXT;
3076                         } else {
3077                             (*oconv)(0, ESC);
3078                             (*oconv)(0, '(');
3079                             /* maintain various input_mode here */
3080                             SEND;
3081                         }
3082                     }
3083                } else if ( c1 == 'N' || c1 == 'n'){
3084                    /* SS2 */
3085                    c3 = (*i_getc)(f);  /* skip SS2 */
3086                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
3087                        c1 = c3;
3088                        c2 = JIS_X_0201;
3089                        SEND;
3090                    }else{
3091                        (*i_ungetc)(c3, f);
3092                        /* lonely ESC  */
3093                        (*oconv)(0, ESC);
3094                        SEND;
3095                    }
3096                 } else {
3097                     /* lonely ESC  */
3098                     (*oconv)(0, ESC);
3099                     SEND;
3100                 }
3101             } else if (c1 == ESC && iconv == s_iconv) {
3102                 /* ESC in Shift_JIS */
3103                 if ((c1 = (*i_getc)(f)) == EOF) {
3104                     /*  (*oconv)(0, ESC); don't send bogus code */
3105                     LAST;
3106                 } else if (c1 == '$') {
3107                     /* J-PHONE emoji */
3108                     if ((c1 = (*i_getc)(f)) == EOF) {
3109                         /*
3110                            (*oconv)(0, ESC); don't send bogus code
3111                            (*oconv)(0, '$'); */
3112                         LAST;
3113                     } else {
3114                         if (('E' <= c1 && c1 <= 'G') ||
3115                             ('O' <= c1 && c1 <= 'Q')) {
3116                             /*
3117                                NUM : 0 1 2 3 4 5
3118                                BYTE: G E F O P Q
3119                                C%7 : 1 6 0 2 3 4
3120                                C%7 : 0 1 2 3 4 5 6
3121                                NUM : 2 0 3 4 5 X 1
3122                              */
3123                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
3124                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
3125                             while ((c1 = (*i_getc)(f)) != EOF) {
3126                                 if (SP <= c1 && c1 <= 'z') {
3127                                     (*oconv)(0, c1 + c0);
3128                                 } else break; /* c1 == SO */
3129                             }
3130                         }
3131                     }
3132                     if (c1 == EOF) LAST;
3133                     NEXT;
3134                 } else {
3135                     /* lonely ESC  */
3136                     (*oconv)(0, ESC);
3137                     SEND;
3138                 }
3139             } else if (c1 == LF || c1 == CR) {
3140                 if (broken_f&4) {
3141                     input_mode = ASCII; set_iconv(FALSE, 0);
3142                     SEND;
3143                 } else if (mime_decode_f && !mime_decode_mode){
3144                     if (c1 == LF) {
3145                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
3146                             i_ungetc(SP,f);
3147                             continue;
3148                         } else {
3149                             i_ungetc(c1,f);
3150                         }
3151                         c1 = LF;
3152                         SEND;
3153                     } else  { /* if (c1 == CR)*/
3154                         if ((c1=(*i_getc)(f))!=EOF) {
3155                             if (c1==SP) {
3156                                 i_ungetc(SP,f);
3157                                 continue;
3158                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
3159                                 i_ungetc(SP,f);
3160                                 continue;
3161                             } else {
3162                                 i_ungetc(c1,f);
3163                             }
3164                             i_ungetc(LF,f);
3165                         } else {
3166                             i_ungetc(c1,f);
3167                         }
3168                         c1 = CR;
3169                         SEND;
3170                     }
3171                 }
3172             } else if (c1 == DEL && input_mode == JIS_X_0208) {
3173                 /* CP5022x */
3174                 c2 = c1;
3175                 NEXT;
3176             } else
3177                 SEND;
3178         }
3179         /* send: */
3180         switch(input_mode){
3181         case ASCII:
3182             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
3183             case -2:
3184                 /* 4 bytes UTF-8 */
3185                 if ((c0 = (*i_getc)(f)) != EOF) {
3186                     code_status(c0);
3187                     c0 <<= 8;
3188                     if ((c3 = (*i_getc)(f)) != EOF) {
3189                         code_status(c3);
3190                         (*iconv)(c2, c1, c0|c3);
3191                     }
3192                 }
3193                 break;
3194             case -1:
3195                 /* 3 bytes EUC or UTF-8 */
3196                 if ((c0 = (*i_getc)(f)) != EOF) {
3197                     code_status(c0);
3198                     (*iconv)(c2, c1, c0);
3199                 }
3200                 break;
3201             }
3202             break;
3203         case JIS_X_0208:
3204         case JIS_X_0213_1:
3205             if (ms_ucs_map_f &&
3206                 0x7F <= c2 && c2 <= 0x92 &&
3207                 0x21 <= c1 && c1 <= 0x7E) {
3208                 /* CP932 UDC */
3209                 if(c1 == 0x7F) return 0;
3210                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
3211                 c2 = 0;
3212             }
3213             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
3214             break;
3215 #ifdef X0212_ENABLE
3216         case JIS_X_0212:
3217             (*oconv)(PREFIX_EUCG3 | c2, c1);
3218             break;
3219 #endif /* X0212_ENABLE */
3220         case JIS_X_0213_2:
3221             (*oconv)(PREFIX_EUCG3 | c2, c1);
3222             break;
3223         default:
3224             (*oconv)(input_mode, c1);  /* other special case */
3225         }
3226
3227         c2 = 0;
3228         c0 = 0;
3229         continue;
3230         /* goto next_word */
3231     }
3232
3233     /* epilogue */
3234     (*iconv)(EOF, 0, 0);
3235     if (!input_codename)
3236     {
3237         if (is_8bit) {
3238             struct input_code *p = input_code_list;
3239             struct input_code *result = p;
3240             while (p->name){
3241                 if (p->score < result->score) result = p;
3242                 ++p;
3243             }
3244             set_input_codename(result->name);
3245 #ifdef CHECK_OPTION
3246             debug(result->name);
3247 #endif
3248         }
3249     }
3250     return 1;
3251 }
3252
3253 nkf_char
3254 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3255 {
3256     nkf_char ret, c3, c0;
3257     int hold_index;
3258
3259
3260     /** it must NOT be in the kanji shifte sequence      */
3261     /** it must NOT be written in JIS7                   */
3262     /** and it must be after 2 byte 8bit code            */
3263
3264     hold_count = 0;
3265     push_hold_buf(c2);
3266     push_hold_buf(c1);
3267
3268     while ((c1 = (*i_getc)(f)) != EOF) {
3269         if (c1 == ESC){
3270             (*i_ungetc)(c1,f);
3271             break;
3272         }
3273         code_status(c1);
3274         if (push_hold_buf(c1) == EOF || estab_f){
3275             break;
3276         }
3277     }
3278
3279     if (!estab_f){
3280         struct input_code *p = input_code_list;
3281         struct input_code *result = p;
3282         if (c1 == EOF){
3283             code_status(c1);
3284         }
3285         while (p->name){
3286             if (p->status_func && p->score < result->score){
3287                 result = p;
3288             }
3289             ++p;
3290         }
3291         set_iconv(TRUE, result->iconv_func);
3292     }
3293
3294
3295     /** now,
3296      ** 1) EOF is detected, or
3297      ** 2) Code is established, or
3298      ** 3) Buffer is FULL (but last word is pushed)
3299      **
3300      ** in 1) and 3) cases, we continue to use
3301      ** Kanji codes by oconv and leave estab_f unchanged.
3302      **/
3303
3304     ret = c1;
3305     hold_index = 0;
3306     while (hold_index < hold_count){
3307         c2 = hold_buf[hold_index++];
3308         if (c2 <= DEL
3309 #ifdef NUMCHAR_OPTION
3310             || is_unicode_capsule(c2)
3311 #endif
3312             ){
3313             (*iconv)(0, c2, 0);
3314             continue;
3315         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3316             (*iconv)(JIS_X_0201, c2, 0);
3317             continue;
3318         }
3319         if (hold_index < hold_count){
3320             c1 = hold_buf[hold_index++];
3321         }else{
3322             c1 = (*i_getc)(f);
3323             if (c1 == EOF){
3324                 c3 = EOF;
3325                 break;
3326             }
3327             code_status(c1);
3328         }
3329         c0 = 0;
3330         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3331         case -2:
3332             /* 4 bytes UTF-8 */
3333             if (hold_index < hold_count){
3334                 c0 = hold_buf[hold_index++];
3335             } else if ((c0 = (*i_getc)(f)) == EOF) {
3336                 ret = EOF;
3337                 break;
3338             } else {
3339                 code_status(c0);
3340                 c0 <<= 8;
3341                 if (hold_index < hold_count){
3342                     c3 = hold_buf[hold_index++];
3343                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3344                     c0 = ret = EOF;
3345                     break;
3346                 } else {
3347                     code_status(c3);
3348                     (*iconv)(c2, c1, c0|c3);
3349                 }
3350             }
3351             break;
3352         case -1:
3353             /* 3 bytes EUC or UTF-8 */
3354             if (hold_index < hold_count){
3355                 c0 = hold_buf[hold_index++];
3356             } else if ((c0 = (*i_getc)(f)) == EOF) {
3357                 ret = EOF;
3358                 break;
3359             } else {
3360                 code_status(c0);
3361             }
3362             (*iconv)(c2, c1, c0);
3363             break;
3364         }
3365         if (c0 == EOF) break;
3366     }
3367     return ret;
3368 }
3369
3370 nkf_char push_hold_buf(nkf_char c2)
3371 {
3372     if (hold_count >= HOLD_SIZE*2)
3373         return (EOF);
3374     hold_buf[hold_count++] = (unsigned char)c2;
3375     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3376 }
3377
3378 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3379 {
3380 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3381     nkf_char val;
3382 #endif
3383     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3384 #ifdef SHIFTJIS_CP932
3385     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3386         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3387         if (val){
3388             c2 = val >> 8;
3389             c1 = val & 0xff;
3390         }
3391     }
3392     if (cp932inv_f
3393         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3394         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3395         if (c){
3396             c2 = c >> 8;
3397             c1 = c & 0xff;
3398         }
3399     }
3400 #endif /* SHIFTJIS_CP932 */
3401 #ifdef X0212_ENABLE
3402     if (!x0213_f && is_ibmext_in_sjis(c2)){
3403         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3404         if (val){
3405             if (val > 0x7FFF){
3406                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3407                 c1 = val & 0xff;
3408             }else{
3409                 c2 = val >> 8;
3410                 c1 = val & 0xff;
3411             }
3412             if (p2) *p2 = c2;
3413             if (p1) *p1 = c1;
3414             return 0;
3415         }
3416     }
3417 #endif
3418     if(c2 >= 0x80){
3419         if(x0213_f && c2 >= 0xF0){
3420             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3421                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3422             }else{ /* 78<=k<=94 */
3423                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3424                 if (0x9E < c1) c2++;
3425             }
3426         }else{
3427             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3428             if (0x9E < c1) c2++;
3429         }
3430         if (c1 < 0x9F)
3431             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3432         else {
3433             c1 = c1 - 0x7E;
3434         }
3435     }
3436
3437 #ifdef X0212_ENABLE
3438     c2 = x0212_unshift(c2);
3439 #endif
3440     if (p2) *p2 = c2;
3441     if (p1) *p1 = c1;
3442     return 0;
3443 }
3444
3445 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3446 {
3447     if (c2 == JIS_X_0201) {
3448         c1 &= 0x7f;
3449     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3450         /* NOP */
3451     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3452         /* CP932 UDC */
3453         if(c1 == 0x7F) return 0;
3454         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3455         c2 = 0;
3456     } else {
3457         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3458         if (ret) return ret;
3459     }
3460     (*oconv)(c2, c1);
3461     return 0;
3462 }
3463
3464 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3465 {
3466     if (c2 == JIS_X_0201) {
3467         c1 &= 0x7f;
3468 #ifdef X0212_ENABLE
3469     }else if (c2 == 0x8f){
3470         if (c0 == 0){
3471             return -1;
3472         }
3473         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3474             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3475             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3476             c2 = 0;
3477         } else {
3478             c2 = (c2 << 8) | (c1 & 0x7f);
3479             c1 = c0 & 0x7f;
3480 #ifdef SHIFTJIS_CP932
3481             if (cp51932_f){
3482                 nkf_char s2, s1;
3483                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3484                     s2e_conv(s2, s1, &c2, &c1);
3485                     if (c2 < 0x100){
3486                         c1 &= 0x7f;
3487                         c2 &= 0x7f;
3488                     }
3489                 }
3490             }
3491 #endif /* SHIFTJIS_CP932 */
3492         }
3493 #endif /* X0212_ENABLE */
3494     } else if (c2 == SSO){
3495         c2 = JIS_X_0201;
3496         c1 &= 0x7f;
3497     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3498         /* NOP */
3499     } else {
3500         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3501             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3502             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3503             c2 = 0;
3504         } else {
3505             c1 &= 0x7f;
3506             c2 &= 0x7f;
3507 #ifdef SHIFTJIS_CP932
3508             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3509                 nkf_char s2, s1;
3510                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3511                     s2e_conv(s2, s1, &c2, &c1);
3512                     if (c2 < 0x100){
3513                         c1 &= 0x7f;
3514                         c2 &= 0x7f;
3515                     }
3516                 }
3517             }
3518 #endif /* SHIFTJIS_CP932 */
3519         }
3520     }
3521     (*oconv)(c2, c1);
3522     return 0;
3523 }
3524
3525 #ifdef UTF8_INPUT_ENABLE
3526 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3527 {
3528     nkf_char ret = 0;
3529
3530     if (!c1){
3531         *p2 = 0;
3532         *p1 = c2;
3533     }else if (0xc0 <= c2 && c2 <= 0xef) {
3534         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3535 #ifdef NUMCHAR_OPTION
3536         if (ret > 0){
3537             if (p2) *p2 = 0;
3538             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3539             ret = 0;
3540         }
3541 #endif
3542     }
3543     return ret;
3544 }
3545
3546 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3547 {
3548     nkf_char ret = 0;
3549     static const char w_iconv_utf8_1st_byte[] =
3550     { /* 0xC0 - 0xFF */
3551         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3552         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3553         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3554         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3555
3556     if (c2 < 0 || 0xff < c2) {
3557     }else if (c2 == 0) { /* 0 : 1 byte*/
3558         c0 = 0;
3559     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3560         return 0;
3561     } else{
3562         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3563         case 21:
3564             if (c1 < 0x80 || 0xBF < c1) return 0;
3565             break;
3566         case 30:
3567             if (c0 == 0) return -1;
3568             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3569                 return 0;
3570             break;
3571         case 31:
3572         case 33:
3573             if (c0 == 0) return -1;
3574             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3575                 return 0;
3576             break;
3577         case 32:
3578             if (c0 == 0) return -1;
3579             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3580                 return 0;
3581             break;
3582         case 40:
3583             if (c0 == 0) return -2;
3584             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3585                 return 0;
3586             break;
3587         case 41:
3588             if (c0 == 0) return -2;
3589             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3590                 return 0;
3591             break;
3592         case 42:
3593             if (c0 == 0) return -2;
3594             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3595                 return 0;
3596             break;
3597         default:
3598             return 0;
3599             break;
3600         }
3601     }
3602     if (c2 == 0 || c2 == EOF){
3603     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3604         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3605         c2 = 0;
3606     } else {
3607         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3608     }
3609     if (ret == 0){
3610         (*oconv)(c2, c1);
3611     }
3612     return ret;
3613 }
3614 #endif
3615
3616 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3617 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3618 {
3619     val &= VALUE_MASK;
3620     if (val < 0x80){
3621         *p2 = val;
3622         *p1 = 0;
3623         *p0 = 0;
3624     }else if (val < 0x800){
3625         *p2 = 0xc0 | (val >> 6);
3626         *p1 = 0x80 | (val & 0x3f);
3627         *p0 = 0;
3628     } else if (val <= NKF_INT32_C(0xFFFF)) {
3629         *p2 = 0xe0 | (val >> 12);
3630         *p1 = 0x80 | ((val >> 6) & 0x3f);
3631         *p0 = 0x80 | (val        & 0x3f);
3632     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3633         *p2 = 0xe0 |  (val >> 16);
3634         *p1 = 0x80 | ((val >> 12) & 0x3f);
3635         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3636     } else {
3637         *p2 = 0;
3638         *p1 = 0;
3639         *p0 = 0;
3640     }
3641 }
3642 #endif
3643
3644 #ifdef UTF8_INPUT_ENABLE
3645 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3646 {
3647     nkf_char val;
3648     if (c2 >= 0xf8) {
3649         val = -1;
3650     } else if (c2 >= 0xf0){
3651         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3652         val = (c2 & 0x0f) << 18;
3653         val |= (c1 & 0x3f) << 12;
3654         val |= (c0 & 0x3f00) >> 2;
3655         val |= (c0 & 0x3f);
3656     }else if (c2 >= 0xe0){
3657         val = (c2 & 0x0f) << 12;
3658         val |= (c1 & 0x3f) << 6;
3659         val |= (c0 & 0x3f);
3660     }else if (c2 >= 0xc0){
3661         val = (c2 & 0x1f) << 6;
3662         val |= (c1 & 0x3f);
3663     }else{
3664         val = c2;
3665     }
3666     return val;
3667 }
3668
3669 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3670 {
3671     nkf_char c2, c1, c0;
3672     nkf_char ret = 0;
3673     val &= VALUE_MASK;
3674     if (val < 0x80){
3675         *p2 = 0;
3676         *p1 = val;
3677     }else{
3678         w16w_conv(val, &c2, &c1, &c0);
3679         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3680 #ifdef NUMCHAR_OPTION
3681         if (ret > 0){
3682             *p2 = 0;
3683             *p1 = CLASS_UNICODE | val;
3684             ret = 0;
3685         }
3686 #endif
3687     }
3688     return ret;
3689 }
3690 #endif
3691
3692 #ifdef UTF8_INPUT_ENABLE
3693 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3694 {
3695     nkf_char ret = 0;
3696     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3697         (*oconv)(c2, c1);
3698         return 0;
3699     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3700         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3701             return -2;
3702         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3703         c2 = 0;
3704     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3705         /*
3706            return 2;
3707         */
3708         return 1;
3709     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3710     if (ret) return ret;
3711     (*oconv)(c2, c1);
3712     return 0;
3713 }
3714
3715 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3716 {
3717     int ret = 0;
3718
3719     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3720     } else if (is_unicode_bmp(c1)) {
3721         ret = w16e_conv(c1, &c2, &c1);
3722     } else {
3723         c2 = 0;
3724         c1 =  CLASS_UNICODE | c1;
3725     }
3726     if (ret) return ret;
3727     (*oconv)(c2, c1);
3728     return 0;
3729 }
3730
3731 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3732 {
3733     const unsigned short *const *pp;
3734     const unsigned short *const *const *ppp;
3735     static const char no_best_fit_chars_table_C2[] =
3736     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3737         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3738         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3739         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3740     static const char no_best_fit_chars_table_C2_ms[] =
3741     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3742         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3743         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3744         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3745     static const char no_best_fit_chars_table_932_C2[] =
3746     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3747         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3748         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3749         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3750     static const char no_best_fit_chars_table_932_C3[] =
3751     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3752         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3753         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3754         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3755     nkf_char ret = 0;
3756
3757     if(c2 < 0x80){
3758         *p2 = 0;
3759         *p1 = c2;
3760     }else if(c2 < 0xe0){
3761         if(no_best_fit_chars_f){
3762             if(ms_ucs_map_f == UCS_MAP_CP932){
3763                 switch(c2){
3764                 case 0xC2:
3765                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3766                     break;
3767                 case 0xC3:
3768                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3769                     break;
3770                 }
3771             }else if(!cp932inv_f){
3772                 switch(c2){
3773                 case 0xC2:
3774                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3775                     break;
3776                 case 0xC3:
3777                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3778                     break;
3779                 }
3780             }else if(ms_ucs_map_f == UCS_MAP_MS){
3781                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3782             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3783                 switch(c2){
3784                 case 0xC2:
3785                     switch(c1){
3786                     case 0xA2:
3787                     case 0xA3:
3788                     case 0xA5:
3789                     case 0xA6:
3790                     case 0xAC:
3791                     case 0xAF:
3792                     case 0xB8:
3793                         return 1;
3794                     }
3795                     break;
3796                 }
3797             }
3798         }
3799         pp =
3800             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3801             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3802             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3803             utf8_to_euc_2bytes;
3804         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3805     }else if(c0 < 0xF0){
3806         if(no_best_fit_chars_f){
3807             if(ms_ucs_map_f == UCS_MAP_CP932){
3808                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3809             }else if(ms_ucs_map_f == UCS_MAP_MS){
3810                 switch(c2){
3811                 case 0xE2:
3812                     switch(c1){
3813                     case 0x80:
3814                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3815                         break;
3816                     case 0x88:
3817                         if(c0 == 0x92) return 1;
3818                         break;
3819                     }
3820                     break;
3821                 case 0xE3:
3822                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3823                     break;
3824                 }
3825             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3826                 switch(c2){
3827                 case 0xE3:
3828                     switch(c1){
3829                     case 0x82:
3830                             if(c0 == 0x94) return 1;
3831                         break;
3832                     case 0x83:
3833                             if(c0 == 0xBB) return 1;
3834                         break;
3835                     }
3836                     break;
3837                 }
3838             }else{
3839                 switch(c2){
3840                 case 0xE2:
3841                     switch(c1){
3842                     case 0x80:
3843                         if(c0 == 0x95) return 1;
3844                         break;
3845                     case 0x88:
3846                         if(c0 == 0xA5) return 1;
3847                         break;
3848                     }
3849                     break;
3850                 case 0xEF:
3851                     switch(c1){
3852                     case 0xBC:
3853                         if(c0 == 0x8D) return 1;
3854                         break;
3855                     case 0xBD:
3856                         if(c0 == 0x9E && !cp932inv_f) return 1;
3857                         break;
3858                     case 0xBF:
3859                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3860                         break;
3861                     }
3862                     break;
3863                 }
3864             }
3865         }
3866         ppp =
3867             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3868             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3869             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3870             utf8_to_euc_3bytes;
3871         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3872     }else return -1;
3873 #ifdef SHIFTJIS_CP932
3874     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3875         nkf_char s2, s1;
3876         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3877             s2e_conv(s2, s1, p2, p1);
3878         }else{
3879             ret = 1;
3880         }
3881     }
3882 #endif
3883     return ret;
3884 }
3885
3886 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3887 {
3888     nkf_char c2;
3889     const unsigned short *p;
3890     unsigned short val;
3891
3892     if (pp == 0) return 1;
3893
3894     c1 -= 0x80;
3895     if (c1 < 0 || psize <= c1) return 1;
3896     p = pp[c1];
3897     if (p == 0)  return 1;
3898
3899     c0 -= 0x80;
3900     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3901     val = p[c0];
3902     if (val == 0) return 1;
3903     if (no_cp932ext_f && (
3904         (val>>8) == 0x2D || /* NEC special characters */
3905         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3906         )) return 1;
3907
3908     c2 = val >> 8;
3909    if (val > 0x7FFF){
3910         c2 &= 0x7f;
3911         c2 |= PREFIX_EUCG3;
3912     }
3913     if (c2 == SO) c2 = JIS_X_0201;
3914     c1 = val & 0x7f;
3915     if (p2) *p2 = c2;
3916     if (p1) *p1 = c1;
3917     return 0;
3918 }
3919
3920 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3921 {
3922     int shift = 20;
3923     c &= VALUE_MASK;
3924     while(shift >= 0){
3925         if(c >= 1<<shift){
3926             while(shift >= 0){
3927                 (*f)(0, bin2hex(c>>shift));
3928                 shift -= 4;
3929             }
3930         }else{
3931             shift -= 4;
3932         }
3933     }
3934     return;
3935 }
3936
3937 void encode_fallback_html(nkf_char c)
3938 {
3939     (*oconv)(0, '&');
3940     (*oconv)(0, '#');
3941     c &= VALUE_MASK;
3942     if(c >= NKF_INT32_C(1000000))
3943         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3944     if(c >= NKF_INT32_C(100000))
3945         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3946     if(c >= 10000)
3947         (*oconv)(0, 0x30+(c/10000  )%10);
3948     if(c >= 1000)
3949         (*oconv)(0, 0x30+(c/1000   )%10);
3950     if(c >= 100)
3951         (*oconv)(0, 0x30+(c/100    )%10);
3952     if(c >= 10)
3953         (*oconv)(0, 0x30+(c/10     )%10);
3954     if(c >= 0)
3955         (*oconv)(0, 0x30+ c         %10);
3956     (*oconv)(0, ';');
3957     return;
3958 }
3959
3960 void encode_fallback_xml(nkf_char c)
3961 {
3962     (*oconv)(0, '&');
3963     (*oconv)(0, '#');
3964     (*oconv)(0, 'x');
3965     nkf_each_char_to_hex(oconv, c);
3966     (*oconv)(0, ';');
3967     return;
3968 }
3969
3970 void encode_fallback_java(nkf_char c)
3971 {
3972     (*oconv)(0, '\\');
3973     c &= VALUE_MASK;
3974     if(!is_unicode_bmp(c)){
3975         (*oconv)(0, 'U');
3976         (*oconv)(0, '0');
3977         (*oconv)(0, '0');
3978         (*oconv)(0, bin2hex(c>>20));
3979         (*oconv)(0, bin2hex(c>>16));
3980     }else{
3981         (*oconv)(0, 'u');
3982     }
3983     (*oconv)(0, bin2hex(c>>12));
3984     (*oconv)(0, bin2hex(c>> 8));
3985     (*oconv)(0, bin2hex(c>> 4));
3986     (*oconv)(0, bin2hex(c    ));
3987     return;
3988 }
3989
3990 void encode_fallback_perl(nkf_char c)
3991 {
3992     (*oconv)(0, '\\');
3993     (*oconv)(0, 'x');
3994     (*oconv)(0, '{');
3995     nkf_each_char_to_hex(oconv, c);
3996     (*oconv)(0, '}');
3997     return;
3998 }
3999
4000 void encode_fallback_subchar(nkf_char c)
4001 {
4002     c = unicode_subchar;
4003     (*oconv)((c>>8)&0xFF, c&0xFF);
4004     return;
4005 }
4006 #endif
4007
4008 #ifdef UTF8_OUTPUT_ENABLE
4009 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
4010 {
4011     const unsigned short *p;
4012
4013     if (c2 == JIS_X_0201) {
4014         if (ms_ucs_map_f == UCS_MAP_CP10001) {
4015             switch (c1) {
4016             case 0x20:
4017                 return 0xA0;
4018             case 0x7D:
4019                 return 0xA9;
4020             }
4021         }
4022         p = euc_to_utf8_1byte;
4023 #ifdef X0212_ENABLE
4024     } else if (is_eucg3(c2)){
4025         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
4026             return 0xA6;
4027         }
4028         c2 = (c2&0x7f) - 0x21;
4029         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4030             p = x0212_to_utf8_2bytes[c2];
4031         else
4032             return 0;
4033 #endif
4034     } else {
4035         c2 &= 0x7f;
4036         c2 = (c2&0x7f) - 0x21;
4037         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
4038             p =
4039                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
4040                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
4041                 euc_to_utf8_2bytes_ms[c2];
4042         else
4043             return 0;
4044     }
4045     if (!p) return 0;
4046     c1 = (c1 & 0x7f) - 0x21;
4047     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
4048         return p[c1];
4049     return 0;
4050 }
4051
4052 void w_oconv(nkf_char c2, nkf_char c1)
4053 {
4054     nkf_char c0;
4055     nkf_char val;
4056
4057     if (output_bom_f) {
4058         output_bom_f = FALSE;
4059         (*o_putc)('\357');
4060         (*o_putc)('\273');
4061         (*o_putc)('\277');
4062     }
4063
4064     if (c2 == EOF) {
4065         (*o_putc)(EOF);
4066         return;
4067     }
4068
4069 #ifdef NUMCHAR_OPTION
4070     if (c2 == 0 && is_unicode_capsule(c1)){
4071         val = c1 & VALUE_MASK;
4072         if (val < 0x80){
4073             (*o_putc)(val);
4074         }else if (val < 0x800){
4075             (*o_putc)(0xC0 | (val >> 6));
4076             (*o_putc)(0x80 | (val & 0x3f));
4077         } else if (val <= NKF_INT32_C(0xFFFF)) {
4078             (*o_putc)(0xE0 | (val >> 12));
4079             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
4080             (*o_putc)(0x80 | (val        & 0x3f));
4081         } else if (val <= NKF_INT32_C(0x10FFFF)) {
4082             (*o_putc)(0xF0 | ( val>>18));
4083             (*o_putc)(0x80 | ((val>>12) & 0x3f));
4084             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
4085             (*o_putc)(0x80 | ( val      & 0x3f));
4086         }
4087         return;
4088     }
4089 #endif
4090
4091     if (c2 == 0) {
4092         output_mode = ASCII;
4093         (*o_putc)(c1);
4094     } else if (c2 == ISO_8859_1) {
4095         output_mode = UTF_8;
4096         (*o_putc)(c1 | 0x080);
4097     } else {
4098         output_mode = UTF_8;
4099         val = e2w_conv(c2, c1);
4100         if (val){
4101             w16w_conv(val, &c2, &c1, &c0);
4102             (*o_putc)(c2);
4103             if (c1){
4104                 (*o_putc)(c1);
4105                 if (c0) (*o_putc)(c0);
4106             }
4107         }
4108     }
4109 }
4110
4111 void w_oconv16(nkf_char c2, nkf_char c1)
4112 {
4113     if (output_bom_f) {
4114         output_bom_f = FALSE;
4115         if (output_endian == ENDIAN_LITTLE){
4116             (*o_putc)((unsigned char)'\377');
4117             (*o_putc)('\376');
4118         }else{
4119             (*o_putc)('\376');
4120             (*o_putc)((unsigned char)'\377');
4121         }
4122     }
4123
4124     if (c2 == EOF) {
4125         (*o_putc)(EOF);
4126         return;
4127     }
4128
4129     if (c2 == ISO_8859_1) {
4130         c2 = 0;
4131         c1 |= 0x80;
4132 #ifdef NUMCHAR_OPTION
4133     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4134         if (is_unicode_bmp(c1)) {
4135             c2 = (c1 >> 8) & 0xff;
4136             c1 &= 0xff;
4137         } else {
4138             c1 &= VALUE_MASK;
4139             if (c1 <= UNICODE_MAX) {
4140                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
4141                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
4142                 if (output_endian == ENDIAN_LITTLE){
4143                     (*o_putc)(c2 & 0xff);
4144                     (*o_putc)((c2 >> 8) & 0xff);
4145                     (*o_putc)(c1 & 0xff);
4146                     (*o_putc)((c1 >> 8) & 0xff);
4147                 }else{
4148                     (*o_putc)((c2 >> 8) & 0xff);
4149                     (*o_putc)(c2 & 0xff);
4150                     (*o_putc)((c1 >> 8) & 0xff);
4151                     (*o_putc)(c1 & 0xff);
4152                 }
4153             }
4154             return;
4155         }
4156 #endif
4157     } else if (c2) {
4158         nkf_char val = e2w_conv(c2, c1);
4159         c2 = (val >> 8) & 0xff;
4160         c1 = val & 0xff;
4161         if (!val) return;
4162     }
4163     if (output_endian == ENDIAN_LITTLE){
4164         (*o_putc)(c1);
4165         (*o_putc)(c2);
4166     }else{
4167         (*o_putc)(c2);
4168         (*o_putc)(c1);
4169     }
4170 }
4171
4172 void w_oconv32(nkf_char c2, nkf_char c1)
4173 {
4174     if (output_bom_f) {
4175         output_bom_f = FALSE;
4176         if (output_endian == ENDIAN_LITTLE){
4177             (*o_putc)((unsigned char)'\377');
4178             (*o_putc)('\376');
4179             (*o_putc)('\000');
4180             (*o_putc)('\000');
4181         }else{
4182             (*o_putc)('\000');
4183             (*o_putc)('\000');
4184             (*o_putc)('\376');
4185             (*o_putc)((unsigned char)'\377');
4186         }
4187     }
4188
4189     if (c2 == EOF) {
4190         (*o_putc)(EOF);
4191         return;
4192     }
4193
4194     if (c2 == ISO_8859_1) {
4195         c1 |= 0x80;
4196 #ifdef NUMCHAR_OPTION
4197     } else if (c2 == 0 && is_unicode_capsule(c1)) {
4198         c1 &= VALUE_MASK;
4199 #endif
4200     } else if (c2) {
4201         c1 = e2w_conv(c2, c1);
4202         if (!c1) return;
4203     }
4204     if (output_endian == ENDIAN_LITTLE){
4205         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4206         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4207         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4208         (*o_putc)('\000');
4209     }else{
4210         (*o_putc)('\000');
4211         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
4212         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
4213         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
4214     }
4215 }
4216 #endif
4217
4218 void e_oconv(nkf_char c2, nkf_char c1)
4219 {
4220 #ifdef NUMCHAR_OPTION