OSDN Git Service

* Ignore options when they were given with guess option.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.142 2007/10/05 10:57:50 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-10-05"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
43 #define MSDOS
44 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
45 #define __WIN32__
46 #endif
47 #endif
48
49 #ifdef PERL_XS
50 #undef OVERWRITE
51 #endif
52
53 #ifndef PERL_XS
54 #include <stdio.h>
55 #endif
56
57 #include <stdlib.h>
58 #include <string.h>
59
60 #if defined(MSDOS) || defined(__OS2__)
61 #include <fcntl.h>
62 #include <io.h>
63 #if defined(_MSC_VER) || defined(__WATCOMC__)
64 #define mktemp _mktemp
65 #endif
66 #endif
67
68 #ifdef MSDOS
69 #ifdef LSI_C
70 #define setbinmode(fp) fsetbin(fp)
71 #elif defined(__DJGPP__)
72 #include <libc/dosio.h>
73 #define setbinmode(fp) djgpp_setbinmode(fp)
74 #else /* Microsoft C, Turbo C */
75 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
76 #endif
77 #else /* UNIX */
78 #define setbinmode(fp)
79 #endif
80
81 #if defined(__DJGPP__)
82 void  djgpp_setbinmode(FILE *fp)
83 {
84     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
85     int fd, m;
86     fd = fileno(fp);
87     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
88     __file_handle_set(fd, m);
89 }
90 #endif
91
92 #ifdef _IOFBF /* SysV and MSDOS, Windows */
93 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
94 #else /* BSD */
95 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
96 #endif
97
98 /*Borland C++ 4.5 EasyWin*/
99 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
100 #define         EASYWIN
101 #ifndef __WIN16__
102 #define __WIN16__
103 #endif
104 #include <windows.h>
105 #endif
106
107 #ifdef OVERWRITE
108 /* added by satoru@isoternet.org */
109 #if defined(__EMX__)
110 #include <sys/types.h>
111 #endif
112 #include <sys/stat.h>
113 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
114 #include <unistd.h>
115 #if defined(__WATCOMC__)
116 #include <sys/utime.h>
117 #else
118 #include <utime.h>
119 #endif
120 #else /* defined(MSDOS) */
121 #ifdef __WIN32__
122 #ifdef __BORLANDC__ /* BCC32 */
123 #include <utime.h>
124 #else /* !defined(__BORLANDC__) */
125 #include <sys/utime.h>
126 #endif /* (__BORLANDC__) */
127 #else /* !defined(__WIN32__) */
128 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
129 #include <sys/utime.h>
130 #elif defined(__TURBOC__) /* BCC */
131 #include <utime.h>
132 #elif defined(LSI_C) /* LSI C */
133 #endif /* (__WIN32__) */
134 #endif
135 #endif
136 #endif
137
138 #define         FALSE   0
139 #define         TRUE    1
140
141 /* state of output_mode and input_mode
142
143    c2           0 means ASCII
144                 X0201
145                 ISO8859_1
146                 X0208
147                 EOF      all termination
148    c1           32bit data
149
150  */
151
152 #define         ASCII           0
153 #define         X0208           1
154 #define         X0201           2
155 #define         ISO8859_1       8
156 #define         NO_X0201        3
157 #define         X0212      0x2844
158 #define         X0213_1    0x284F
159 #define         X0213_2    0x2850
160
161 /* Input Assumption */
162
163 #define         JIS_INPUT       4
164 #define         EUC_INPUT      16
165 #define         SJIS_INPUT      5
166 #define         LATIN1_INPUT    6
167 #define         FIXED_MIME      7
168 #define         STRICT_MIME     8
169
170 /* MIME ENCODE */
171
172 #define         ISO2022JP       9
173 #define         JAPANESE_EUC   10
174 #define         SHIFT_JIS      11
175
176 #define         UTF8           12
177 #define         UTF8_INPUT     13
178 #define         UTF16_INPUT    1015
179 #define         UTF32_INPUT    1017
180
181 /* byte order */
182
183 #define         ENDIAN_BIG      1234
184 #define         ENDIAN_LITTLE   4321
185 #define         ENDIAN_2143     2143
186 #define         ENDIAN_3412     3412
187
188 #define         WISH_TRUE      15
189
190 /* ASCII CODE */
191
192 #define         BS      0x08
193 #define         TAB     0x09
194 #define         LF      0x0a
195 #define         CR      0x0d
196 #define         ESC     0x1b
197 #define         SP      0x20
198 #define         AT      0x40
199 #define         SSP     0xa0
200 #define         DEL     0x7f
201 #define         SI      0x0f
202 #define         SO      0x0e
203 #define         SSO     0x8e
204 #define         SS3     0x8f
205 #define         CRLF    0x0D0A
206
207 #define         is_alnum(c)  \
208             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
209
210 /* I don't trust portablity of toupper */
211 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
212 #define nkf_isoctal(c)  ('0'<=c && c<='7')
213 #define nkf_isdigit(c)  ('0'<=c && c<='9')
214 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
215 #define nkf_isblank(c) (c == SP || c == TAB)
216 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
217 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
218 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
219 #define nkf_isprint(c) (SP<=c && c<='~')
220 #define nkf_isgraph(c) ('!'<=c && c<='~')
221 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
222                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
223                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
224 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
225 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
226
227 #define CP932_TABLE_BEGIN 0xFA
228 #define CP932_TABLE_END   0xFC
229 #define CP932INV_TABLE_BEGIN 0xED
230 #define CP932INV_TABLE_END   0xEE
231 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
232
233 #define         HOLD_SIZE       1024
234 #if defined(INT_IS_SHORT)
235 #define         IOBUF_SIZE      2048
236 #else
237 #define         IOBUF_SIZE      16384
238 #endif
239
240 #define         DEFAULT_J       'B'
241 #define         DEFAULT_R       'B'
242
243 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
244 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
245
246 #define         RANGE_NUM_MAX   18
247 #define         GETA1   0x22
248 #define         GETA2   0x2e
249
250
251 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
252 #define sizeof_euc_to_utf8_1byte 94
253 #define sizeof_euc_to_utf8_2bytes 94
254 #define sizeof_utf8_to_euc_C2 64
255 #define sizeof_utf8_to_euc_E5B8 64
256 #define sizeof_utf8_to_euc_2bytes 112
257 #define sizeof_utf8_to_euc_3bytes 16
258 #endif
259
260 /* MIME preprocessor */
261
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
264 #endif
265
266 struct input_code{
267     char *name;
268     nkf_char stat;
269     nkf_char score;
270     nkf_char index;
271     nkf_char buf[3];
272     void (*status_func)(struct input_code *, nkf_char);
273     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
274     int _file_stat;
275 };
276
277 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
278
279 #ifndef PERL_XS
280 static const char *CopyRight = COPY_RIGHT;
281 #endif
282 #if !defined(PERL_XS) && !defined(WIN32DLL)
283 static  nkf_char     noconvert(FILE *f);
284 #endif
285 static  void    module_connection(void);
286 static  nkf_char     kanji_convert(FILE *f);
287 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
288 static  nkf_char     push_hold_buf(nkf_char c2);
289 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
290 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
291 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
292 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
293 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
294 /* UCS Mapping
295  * 0: Shift_JIS, eucJP-ascii
296  * 1: eucJP-ms
297  * 2: CP932, CP51932
298  * 3: CP10001
299  */
300 #define UCS_MAP_ASCII   0
301 #define UCS_MAP_MS      1
302 #define UCS_MAP_CP932   2
303 #define UCS_MAP_CP10001 3
304 static int ms_ucs_map_f = UCS_MAP_ASCII;
305 #endif
306 #ifdef UTF8_INPUT_ENABLE
307 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
308 static  int     no_cp932ext_f = FALSE;
309 /* ignore ZERO WIDTH NO-BREAK SPACE */
310 static  int     no_best_fit_chars_f = FALSE;
311 static  int     input_endian = ENDIAN_BIG;
312 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
313 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
314 static  void    encode_fallback_html(nkf_char c);
315 static  void    encode_fallback_xml(nkf_char c);
316 static  void    encode_fallback_java(nkf_char c);
317 static  void    encode_fallback_perl(nkf_char c);
318 static  void    encode_fallback_subchar(nkf_char c);
319 static  void    (*encode_fallback)(nkf_char c) = NULL;
320 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
321 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
322 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
323 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
324 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
325 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
326 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
327 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
328 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
329 static  void    w_status(struct input_code *, nkf_char);
330 #endif
331 #ifdef UTF8_OUTPUT_ENABLE
332 static  int     output_bom_f = FALSE;
333 static  int     output_endian = ENDIAN_BIG;
334 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
335 static  void    w_oconv(nkf_char c2,nkf_char c1);
336 static  void    w_oconv16(nkf_char c2,nkf_char c1);
337 static  void    w_oconv32(nkf_char c2,nkf_char c1);
338 #endif
339 static  void    e_oconv(nkf_char c2,nkf_char c1);
340 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
341 static  void    s_oconv(nkf_char c2,nkf_char c1);
342 static  void    j_oconv(nkf_char c2,nkf_char c1);
343 static  void    fold_conv(nkf_char c2,nkf_char c1);
344 static  void    nl_conv(nkf_char c2,nkf_char c1);
345 static  void    z_conv(nkf_char c2,nkf_char c1);
346 static  void    rot_conv(nkf_char c2,nkf_char c1);
347 static  void    hira_conv(nkf_char c2,nkf_char c1);
348 static  void    base64_conv(nkf_char c2,nkf_char c1);
349 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
350 static  void    no_connection(nkf_char c2,nkf_char c1);
351 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
352
353 static  void    code_score(struct input_code *ptr);
354 static  void    code_status(nkf_char c);
355
356 static  void    std_putc(nkf_char c);
357 static  nkf_char     std_getc(FILE *f);
358 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
359
360 static  nkf_char     broken_getc(FILE *f);
361 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
362
363 static  nkf_char     mime_begin(FILE *f);
364 static  nkf_char     mime_getc(FILE *f);
365 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
366
367 static  void    switch_mime_getc(void);
368 static  void    unswitch_mime_getc(void);
369 static  nkf_char     mime_begin_strict(FILE *f);
370 static  nkf_char     mime_getc_buf(FILE *f);
371 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
372 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
373
374 static  nkf_char     base64decode(nkf_char c);
375 static  void    mime_prechar(nkf_char c2, nkf_char c1);
376 static  void    mime_putc(nkf_char c);
377 static  void    open_mime(nkf_char c);
378 static  void    close_mime(void);
379 static  void    eof_mime(void);
380 static  void    mimeout_addchar(nkf_char c);
381 #ifndef PERL_XS
382 static  void    usage(void);
383 static  void    version(void);
384 #endif
385 static  void    options(unsigned char *c);
386 static  void    reinit(void);
387
388 /* buffers */
389
390 #if !defined(PERL_XS) && !defined(WIN32DLL)
391 static unsigned char   stdibuf[IOBUF_SIZE];
392 static unsigned char   stdobuf[IOBUF_SIZE];
393 #endif
394 static unsigned char   hold_buf[HOLD_SIZE*2];
395 static int             hold_count = 0;
396
397 /* MIME preprocessor fifo */
398
399 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
400 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
401 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
402 static unsigned char           mime_buf[MIME_BUF_SIZE];
403 static unsigned int            mime_top = 0;
404 static unsigned int            mime_last = 0;  /* decoded */
405 static unsigned int            mime_input = 0; /* undecoded */
406 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
407
408 /* flags */
409 static int             unbuf_f = FALSE;
410 static int             estab_f = FALSE;
411 static int             nop_f = FALSE;
412 static int             binmode_f = TRUE;       /* binary mode */
413 static int             rot_f = FALSE;          /* rot14/43 mode */
414 static int             hira_f = FALSE;          /* hira/kata henkan */
415 static int             input_f = FALSE;        /* non fixed input code  */
416 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
417 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
418 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
419 static int             mimebuf_f = FALSE;      /* MIME buffered input */
420 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
421 static int             iso8859_f = FALSE;      /* ISO8859 through */
422 static int             mimeout_f = FALSE;       /* base64 mode */
423 #if defined(MSDOS) || defined(__OS2__)
424 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
425 #else
426 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
427 #endif
428 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
429
430 #ifdef UNICODE_NORMALIZATION
431 static int nfc_f = FALSE;
432 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
433 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
434 static nkf_char nfc_getc(FILE *f);
435 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
436 #endif
437
438 #ifdef INPUT_OPTION
439 static int cap_f = FALSE;
440 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
441 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
442 static nkf_char cap_getc(FILE *f);
443 static nkf_char cap_ungetc(nkf_char c,FILE *f);
444
445 static int url_f = FALSE;
446 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
447 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
448 static nkf_char url_getc(FILE *f);
449 static nkf_char url_ungetc(nkf_char c,FILE *f);
450 #endif
451
452 #if defined(INT_IS_SHORT)
453 #define NKF_INT32_C(n)   (n##L)
454 #else
455 #define NKF_INT32_C(n)   (n)
456 #endif
457 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
458 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
459 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
460 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
461 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
462 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
463 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
464
465 #ifdef NUMCHAR_OPTION
466 static int numchar_f = FALSE;
467 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
468 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
469 static nkf_char numchar_getc(FILE *f);
470 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
471 #endif
472
473 #ifdef CHECK_OPTION
474 static int noout_f = FALSE;
475 static void no_putc(nkf_char c);
476 static int debug_f = FALSE;
477 static void debug(const char *str);
478 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
479 #endif
480
481 static int guess_f = FALSE;
482 #if !defined PERL_XS
483 static  void    print_guessed_code(char *filename);
484 #endif
485 static  void    set_input_codename(char *codename);
486
487 #ifdef EXEC_IO
488 static int exec_f = 0;
489 #endif
490
491 #ifdef SHIFTJIS_CP932
492 /* invert IBM extended characters to others */
493 static int cp51932_f = FALSE;
494
495 /* invert NEC-selected IBM extended characters to IBM extended characters */
496 static int cp932inv_f = TRUE;
497
498 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
499 #endif /* SHIFTJIS_CP932 */
500
501 #ifdef X0212_ENABLE
502 static int x0212_f = FALSE;
503 static nkf_char x0212_shift(nkf_char c);
504 static nkf_char x0212_unshift(nkf_char c);
505 #endif
506 static int x0213_f = FALSE;
507
508 static unsigned char prefix_table[256];
509
510 static void set_code_score(struct input_code *ptr, nkf_char score);
511 static void clr_code_score(struct input_code *ptr, nkf_char score);
512 static void status_disable(struct input_code *ptr);
513 static void status_push_ch(struct input_code *ptr, nkf_char c);
514 static void status_clear(struct input_code *ptr);
515 static void status_reset(struct input_code *ptr);
516 static void status_reinit(struct input_code *ptr);
517 static void status_check(struct input_code *ptr, nkf_char c);
518 static void e_status(struct input_code *, nkf_char);
519 static void s_status(struct input_code *, nkf_char);
520
521 struct input_code input_code_list[] = {
522     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
523     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
524 #ifdef UTF8_INPUT_ENABLE
525     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
526     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
527     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
528 #endif
529     {0}
530 };
531
532 static int              mimeout_mode = 0;
533 static int              base64_count = 0;
534
535 /* X0208 -> ASCII converter */
536
537 /* fold parameter */
538 static int             f_line = 0;    /* chars in line */
539 static int             f_prev = 0;
540 static int             fold_preserve_f = FALSE; /* preserve new lines */
541 static int             fold_f  = FALSE;
542 static int             fold_len  = 0;
543
544 /* options */
545 static unsigned char   kanji_intro = DEFAULT_J;
546 static unsigned char   ascii_intro = DEFAULT_R;
547
548 /* Folding */
549
550 #define FOLD_MARGIN  10
551 #define DEFAULT_FOLD 60
552
553 static int             fold_margin  = FOLD_MARGIN;
554
555 /* converters */
556
557 #ifdef DEFAULT_CODE_JIS
558 #   define  DEFAULT_CONV j_oconv
559 #endif
560 #ifdef DEFAULT_CODE_SJIS
561 #   define  DEFAULT_CONV s_oconv
562 #endif
563 #ifdef DEFAULT_CODE_EUC
564 #   define  DEFAULT_CONV e_oconv
565 #endif
566 #ifdef DEFAULT_CODE_UTF8
567 #   define  DEFAULT_CONV w_oconv
568 #endif
569
570 /* process default */
571 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
572
573 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
574 /* s_iconv or oconv */
575 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
576
577 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
578 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
579 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
580 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
581 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
582 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
583 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
584
585 /* static redirections */
586
587 static  void   (*o_putc)(nkf_char c) = std_putc;
588
589 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
590 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
591
592 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
593 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
594
595 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
596
597 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
598 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
599
600 /* for strict mime */
601 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
602 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
603
604 /* Global states */
605 static int output_mode = ASCII,    /* output kanji mode */
606            input_mode =  ASCII,    /* input kanji mode */
607            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
608 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
609
610 /* X0201 / X0208 conversion tables */
611
612 /* X0201 kana conversion table */
613 /* 90-9F A0-DF */
614 static const unsigned char cv[]= {
615     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
616     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
617     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
618     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
619     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
620     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
621     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
622     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
623     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
624     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
625     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
626     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
627     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
628     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
629     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
630     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
631     0x00,0x00};
632
633
634 /* X0201 kana conversion table for daguten */
635 /* 90-9F A0-DF */
636 static const unsigned char dv[]= {
637     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
638     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
639     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
642     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
643     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
644     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
645     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
646     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
647     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
648     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
649     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
650     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
651     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
652     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
653     0x00,0x00};
654
655 /* X0201 kana conversion table for han-daguten */
656 /* 90-9F A0-DF */
657 static const unsigned char ev[]= {
658     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
659     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
660     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
661     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
669     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
672     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
674     0x00,0x00};
675
676
677 /* X0208 kigou conversion table */
678 /* 0x8140 - 0x819e */
679 static const unsigned char fv[] = {
680
681     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
682     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
683     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
684     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
685     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
686     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
687     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
688     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
689     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
690     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
691     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
692     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
693 } ;
694
695
696
697 static int             file_out_f = FALSE;
698 #ifdef OVERWRITE
699 static int             overwrite_f = FALSE;
700 static int             preserve_time_f = FALSE;
701 static int             backup_f = FALSE;
702 static char            *backup_suffix = "";
703 static char *get_backup_filename(const char *suffix, const char *filename);
704 #endif
705
706 static int nlmode_f = 0;   /* CR, LF, CRLF */
707 static int input_nextline = 0; /* 0: unestablished, EOF: MIXED */
708 static nkf_char prev_cr = 0; /* CR or 0 */
709 #ifdef EASYWIN /*Easy Win */
710 static int             end_check;
711 #endif /*Easy Win */
712
713 #define STD_GC_BUFSIZE (256)
714 nkf_char std_gc_buf[STD_GC_BUFSIZE];
715 nkf_char std_gc_ndx;
716
717 #ifdef WIN32DLL
718 #include "nkf32dll.c"
719 #elif defined(PERL_XS)
720 #else /* WIN32DLL */
721 int main(int argc, char **argv)
722 {
723     FILE  *fin;
724     unsigned char  *cp;
725
726     char *outfname = NULL;
727     char *origfname;
728
729 #ifdef EASYWIN /*Easy Win */
730     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
731 #endif
732
733     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
734         cp = (unsigned char *)*argv;
735         options(cp);
736         if (guess_f) {
737 #ifdef CHECK_OPTION
738             int debug_f_back = debug_f;
739 #endif
740 #ifdef EXEC_IO
741             int exec_f_back = exec_f;
742 #endif
743 #ifdef X0212_ENABLE
744             int x0212_f_back = x0212_f;
745 #endif
746 #ifdef X0212_ENABLE
747             int x0213_f_back = x0213_f;
748 #endif
749             reinit();
750             guess_f = TRUE;
751             mime_f = FALSE;
752 #ifdef CHECK_OPTION
753             debug_f = debug_f_back;
754 #endif
755 #ifdef EXEC_IO
756             exec_f = exec_f_back;
757 #endif
758 #ifdef X0212_ENABLE
759             x0212_f = x0212_f_back;
760 #endif
761 #ifdef X0213_ENABLE
762             x0213_f = x0213_f_back;
763 #endif
764     }
765 #ifdef EXEC_IO
766         if (exec_f){
767             int fds[2], pid;
768             if (pipe(fds) < 0 || (pid = fork()) < 0){
769                 abort();
770             }
771             if (pid == 0){
772                 if (exec_f > 0){
773                     close(fds[0]);
774                     dup2(fds[1], 1);
775                 }else{
776                     close(fds[1]);
777                     dup2(fds[0], 0);
778                 }
779                 execvp(argv[1], &argv[1]);
780             }
781             if (exec_f > 0){
782                 close(fds[1]);
783                 dup2(fds[0], 0);
784             }else{
785                 close(fds[0]);
786                 dup2(fds[1], 1);
787             }
788             argc = 0;
789             break;
790         }
791 #endif
792     }
793     if(x0201_f == WISH_TRUE)
794          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
795
796     if (binmode_f == TRUE)
797 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
798     if (freopen("","wb",stdout) == NULL)
799         return (-1);
800 #else
801     setbinmode(stdout);
802 #endif
803
804     if (unbuf_f)
805       setbuf(stdout, (char *) NULL);
806     else
807       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
808
809     if (argc == 0) {
810       if (binmode_f == TRUE)
811 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
812       if (freopen("","rb",stdin) == NULL) return (-1);
813 #else
814       setbinmode(stdin);
815 #endif
816       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
817       if (nop_f)
818           noconvert(stdin);
819       else {
820           kanji_convert(stdin);
821           if (guess_f) print_guessed_code(NULL);
822       }
823     } else {
824       int nfiles = argc;
825         int is_argument_error = FALSE;
826       while (argc--) {
827             input_codename = NULL;
828             input_nextline = 0;
829 #ifdef CHECK_OPTION
830             iconv_for_check = 0;
831 #endif
832           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
833               perror(*--argv);
834                 *argv++;
835                 is_argument_error = TRUE;
836                 continue;
837           } else {
838 #ifdef OVERWRITE
839               int fd = 0;
840               int fd_backup = 0;
841 #endif
842
843 /* reopen file for stdout */
844               if (file_out_f == TRUE) {
845 #ifdef OVERWRITE
846                   if (overwrite_f){
847                       outfname = malloc(strlen(origfname)
848                                         + strlen(".nkftmpXXXXXX")
849                                         + 1);
850                       if (!outfname){
851                           perror(origfname);
852                           return -1;
853                       }
854                       strcpy(outfname, origfname);
855 #ifdef MSDOS
856                       {
857                           int i;
858                           for (i = strlen(outfname); i; --i){
859                               if (outfname[i - 1] == '/'
860                                   || outfname[i - 1] == '\\'){
861                                   break;
862                               }
863                           }
864                           outfname[i] = '\0';
865                       }
866                       strcat(outfname, "ntXXXXXX");
867                       mktemp(outfname);
868                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
869                                 S_IREAD | S_IWRITE);
870 #else
871                       strcat(outfname, ".nkftmpXXXXXX");
872                       fd = mkstemp(outfname);
873 #endif
874                       if (fd < 0
875                           || (fd_backup = dup(fileno(stdout))) < 0
876                           || dup2(fd, fileno(stdout)) < 0
877                           ){
878                           perror(origfname);
879                           return -1;
880                       }
881                   }else
882 #endif
883                   if(argc == 1) {
884                       outfname = *argv++;
885                       argc--;
886                   } else {
887                       outfname = "nkf.out";
888                   }
889
890                   if(freopen(outfname, "w", stdout) == NULL) {
891                       perror (outfname);
892                       return (-1);
893                   }
894                   if (binmode_f == TRUE) {
895 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
896                       if (freopen("","wb",stdout) == NULL)
897                            return (-1);
898 #else
899                       setbinmode(stdout);
900 #endif
901                   }
902               }
903               if (binmode_f == TRUE)
904 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
905                  if (freopen("","rb",fin) == NULL)
906                     return (-1);
907 #else
908                  setbinmode(fin);
909 #endif
910               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
911               if (nop_f)
912                   noconvert(fin);
913               else {
914                   char *filename = NULL;
915                   kanji_convert(fin);
916                   if (nfiles > 1) filename = origfname;
917                   if (guess_f) print_guessed_code(filename);
918               }
919               fclose(fin);
920 #ifdef OVERWRITE
921               if (overwrite_f) {
922                   struct stat     sb;
923 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
924                   time_t tb[2];
925 #else
926                   struct utimbuf  tb;
927 #endif
928
929                   fflush(stdout);
930                   close(fd);
931                   if (dup2(fd_backup, fileno(stdout)) < 0){
932                       perror("dup2");
933                   }
934                   if (stat(origfname, &sb)) {
935                       fprintf(stderr, "Can't stat %s\n", origfname);
936                   }
937                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
938                   if (chmod(outfname, sb.st_mode)) {
939                       fprintf(stderr, "Can't set permission %s\n", outfname);
940                   }
941
942                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
943                     if(preserve_time_f){
944 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
945                         tb[0] = tb[1] = sb.st_mtime;
946                         if (utime(outfname, tb)) {
947                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
948                         }
949 #else
950                         tb.actime  = sb.st_atime;
951                         tb.modtime = sb.st_mtime;
952                         if (utime(outfname, &tb)) {
953                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
954                         }
955 #endif
956                     }
957                     if(backup_f){
958                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
959 #ifdef MSDOS
960                         unlink(backup_filename);
961 #endif
962                         if (rename(origfname, backup_filename)) {
963                             perror(backup_filename);
964                             fprintf(stderr, "Can't rename %s to %s\n",
965                                     origfname, backup_filename);
966                         }
967                     }else{
968 #ifdef MSDOS
969                         if (unlink(origfname)){
970                             perror(origfname);
971                         }
972 #endif
973                     }
974                   if (rename(outfname, origfname)) {
975                       perror(origfname);
976                       fprintf(stderr, "Can't rename %s to %s\n",
977                               outfname, origfname);
978                   }
979                   free(outfname);
980               }
981 #endif
982           }
983       }
984         if (is_argument_error)
985             return(-1);
986     }
987 #ifdef EASYWIN /*Easy Win */
988     if (file_out_f == FALSE)
989         scanf("%d",&end_check);
990     else
991         fclose(stdout);
992 #else /* for Other OS */
993     if (file_out_f == TRUE)
994         fclose(stdout);
995 #endif /*Easy Win */
996     return (0);
997 }
998 #endif /* WIN32DLL */
999
1000 #ifdef OVERWRITE
1001 char *get_backup_filename(const char *suffix, const char *filename)
1002 {
1003     char *backup_filename;
1004     int asterisk_count = 0;
1005     int i, j;
1006     int filename_length = strlen(filename);
1007
1008     for(i = 0; suffix[i]; i++){
1009         if(suffix[i] == '*') asterisk_count++;
1010     }
1011
1012     if(asterisk_count){
1013         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1014         if (!backup_filename){
1015             perror("Can't malloc backup filename.");
1016             return NULL;
1017         }
1018
1019         for(i = 0, j = 0; suffix[i];){
1020             if(suffix[i] == '*'){
1021                 backup_filename[j] = '\0';
1022                 strncat(backup_filename, filename, filename_length);
1023                 i++;
1024                 j += filename_length;
1025             }else{
1026                 backup_filename[j++] = suffix[i++];
1027             }
1028         }
1029         backup_filename[j] = '\0';
1030     }else{
1031         j = strlen(suffix) + filename_length;
1032         backup_filename = malloc( + 1);
1033         strcpy(backup_filename, filename);
1034         strcat(backup_filename, suffix);
1035         backup_filename[j] = '\0';
1036     }
1037     return backup_filename;
1038 }
1039 #endif
1040
1041 static const struct {
1042     const char *name;
1043     const char *alias;
1044 } long_option[] = {
1045     {"ic=", ""},
1046     {"oc=", ""},
1047     {"base64","jMB"},
1048     {"euc","e"},
1049     {"euc-input","E"},
1050     {"fj","jm"},
1051     {"help","v"},
1052     {"jis","j"},
1053     {"jis-input","J"},
1054     {"mac","sLm"},
1055     {"mime","jM"},
1056     {"mime-input","m"},
1057     {"msdos","sLw"},
1058     {"sjis","s"},
1059     {"sjis-input","S"},
1060     {"unix","eLu"},
1061     {"version","V"},
1062     {"windows","sLw"},
1063     {"hiragana","h1"},
1064     {"katakana","h2"},
1065     {"katakana-hiragana","h3"},
1066     {"guess", "g"},
1067     {"cp932", ""},
1068     {"no-cp932", ""},
1069 #ifdef X0212_ENABLE
1070     {"x0212", ""},
1071 #endif
1072 #ifdef UTF8_OUTPUT_ENABLE
1073     {"utf8", "w"},
1074     {"utf16", "w16"},
1075     {"ms-ucs-map", ""},
1076     {"fb-skip", ""},
1077     {"fb-html", ""},
1078     {"fb-xml", ""},
1079     {"fb-perl", ""},
1080     {"fb-java", ""},
1081     {"fb-subchar", ""},
1082     {"fb-subchar=", ""},
1083 #endif
1084 #ifdef UTF8_INPUT_ENABLE
1085     {"utf8-input", "W"},
1086     {"utf16-input", "W16"},
1087     {"no-cp932ext", ""},
1088     {"no-best-fit-chars",""},
1089 #endif
1090 #ifdef UNICODE_NORMALIZATION
1091     {"utf8mac-input", ""},
1092 #endif
1093 #ifdef OVERWRITE
1094     {"overwrite", ""},
1095     {"overwrite=", ""},
1096     {"in-place", ""},
1097     {"in-place=", ""},
1098 #endif
1099 #ifdef INPUT_OPTION
1100     {"cap-input", ""},
1101     {"url-input", ""},
1102 #endif
1103 #ifdef NUMCHAR_OPTION
1104     {"numchar-input", ""},
1105 #endif
1106 #ifdef CHECK_OPTION
1107     {"no-output", ""},
1108     {"debug", ""},
1109 #endif
1110 #ifdef SHIFTJIS_CP932
1111     {"cp932inv", ""},
1112 #endif
1113 #ifdef EXEC_IO
1114     {"exec-in", ""},
1115     {"exec-out", ""},
1116 #endif
1117     {"prefix=", ""},
1118 };
1119
1120 static int option_mode = 0;
1121
1122 void options(unsigned char *cp)
1123 {
1124     nkf_char i, j;
1125     unsigned char *p;
1126     unsigned char *cp_back = NULL;
1127     char codeset[32];
1128
1129     if (option_mode==1)
1130         return;
1131     while(*cp && *cp++!='-');
1132     while (*cp || cp_back) {
1133         if(!*cp){
1134             cp = cp_back;
1135             cp_back = NULL;
1136             continue;
1137         }
1138         p = 0;
1139         switch (*cp++) {
1140         case '-':  /* literal options */
1141             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1142                 option_mode = 1;
1143                 return;
1144             }
1145             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1146                 p = (unsigned char *)long_option[i].name;
1147                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1148                 if (*p == cp[j] || cp[j] == SP){
1149                     p = &cp[j] + 1;
1150                     break;
1151                 }
1152                 p = 0;
1153             }
1154             if (p == 0) {
1155                 fprintf(stderr, "unknown long option: --%s\n", cp);
1156                 return;
1157             }
1158             while(*cp && *cp != SP && cp++);
1159             if (long_option[i].alias[0]){
1160                 cp_back = cp;
1161                 cp = (unsigned char *)long_option[i].alias;
1162             }else{
1163                 if (strcmp(long_option[i].name, "ic=") == 0){
1164                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1165                         codeset[i] = nkf_toupper(p[i]);
1166                     }
1167                     codeset[i] = 0;
1168                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1169                         input_f = JIS_INPUT;
1170                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1171                       strcmp(codeset, "CP50220") == 0 ||
1172                       strcmp(codeset, "CP50221") == 0 ||
1173                       strcmp(codeset, "CP50222") == 0){
1174                         input_f = JIS_INPUT;
1175 #ifdef SHIFTJIS_CP932
1176                         cp51932_f = TRUE;
1177 #endif
1178 #ifdef UTF8_OUTPUT_ENABLE
1179                         ms_ucs_map_f = UCS_MAP_CP932;
1180 #endif
1181                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1182                         input_f = JIS_INPUT;
1183 #ifdef X0212_ENABLE
1184                         x0212_f = TRUE;
1185 #endif
1186                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1187                         input_f = JIS_INPUT;
1188 #ifdef X0212_ENABLE
1189                         x0212_f = TRUE;
1190 #endif
1191                         x0213_f = TRUE;
1192                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1193                         input_f = SJIS_INPUT;
1194                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1195                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1196                              strcmp(codeset, "CP932") == 0 ||
1197                              strcmp(codeset, "MS932") == 0){
1198                         input_f = SJIS_INPUT;
1199 #ifdef SHIFTJIS_CP932
1200                         cp51932_f = TRUE;
1201 #endif
1202 #ifdef UTF8_OUTPUT_ENABLE
1203                         ms_ucs_map_f = UCS_MAP_CP932;
1204 #endif
1205                     }else if(strcmp(codeset, "CP10001") == 0){
1206                         input_f = SJIS_INPUT;
1207 #ifdef SHIFTJIS_CP932
1208                         cp51932_f = TRUE;
1209 #endif
1210 #ifdef UTF8_OUTPUT_ENABLE
1211                         ms_ucs_map_f = UCS_MAP_CP10001;
1212 #endif
1213                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1214                              strcmp(codeset, "EUC-JP") == 0){
1215                         input_f = EUC_INPUT;
1216                     }else if(strcmp(codeset, "CP51932") == 0){
1217                         input_f = EUC_INPUT;
1218 #ifdef SHIFTJIS_CP932
1219                         cp51932_f = TRUE;
1220 #endif
1221 #ifdef UTF8_OUTPUT_ENABLE
1222                         ms_ucs_map_f = UCS_MAP_CP932;
1223 #endif
1224                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1225                              strcmp(codeset, "EUCJP-MS") == 0 ||
1226                              strcmp(codeset, "EUCJPMS") == 0){
1227                         input_f = EUC_INPUT;
1228 #ifdef SHIFTJIS_CP932
1229                         cp51932_f = FALSE;
1230 #endif
1231 #ifdef UTF8_OUTPUT_ENABLE
1232                         ms_ucs_map_f = UCS_MAP_MS;
1233 #endif
1234                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1235                              strcmp(codeset, "EUCJP-ASCII") == 0){
1236                         input_f = EUC_INPUT;
1237 #ifdef SHIFTJIS_CP932
1238                         cp51932_f = FALSE;
1239 #endif
1240 #ifdef UTF8_OUTPUT_ENABLE
1241                         ms_ucs_map_f = UCS_MAP_ASCII;
1242 #endif
1243                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1244                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1245                         input_f = SJIS_INPUT;
1246                         x0213_f = TRUE;
1247 #ifdef SHIFTJIS_CP932
1248                         cp51932_f = FALSE;
1249 #endif
1250                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1251                              strcmp(codeset, "EUC-JIS-2004") == 0){
1252                         input_f = EUC_INPUT;
1253                         x0213_f = TRUE;
1254 #ifdef SHIFTJIS_CP932
1255                         cp51932_f = FALSE;
1256 #endif
1257 #ifdef UTF8_INPUT_ENABLE
1258                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1259                              strcmp(codeset, "UTF-8N") == 0 ||
1260                              strcmp(codeset, "UTF-8-BOM") == 0){
1261                         input_f = UTF8_INPUT;
1262 #ifdef UNICODE_NORMALIZATION
1263                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1264                              strcmp(codeset, "UTF-8-MAC") == 0){
1265                         input_f = UTF8_INPUT;
1266                         nfc_f = TRUE;
1267 #endif
1268                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1269                              strcmp(codeset, "UTF-16BE") == 0 ||
1270                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1271                         input_f = UTF16_INPUT;
1272                         input_endian = ENDIAN_BIG;
1273                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1274                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1275                         input_f = UTF16_INPUT;
1276                         input_endian = ENDIAN_LITTLE;
1277                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1278                              strcmp(codeset, "UTF-32BE") == 0 ||
1279                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1280                         input_f = UTF32_INPUT;
1281                         input_endian = ENDIAN_BIG;
1282                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1283                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1284                         input_f = UTF32_INPUT;
1285                         input_endian = ENDIAN_LITTLE;
1286 #endif
1287                     } else {
1288                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1289                     }
1290                     continue;
1291                 }
1292                 if (strcmp(long_option[i].name, "oc=") == 0){
1293                     x0201_f = FALSE;
1294                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1295                         codeset[i] = nkf_toupper(p[i]);
1296                     }
1297                     codeset[i] = 0;
1298                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1299                         output_conv = j_oconv;
1300                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1301                         output_conv = j_oconv;
1302                         no_cp932ext_f = TRUE;
1303 #ifdef SHIFTJIS_CP932
1304                         cp932inv_f = FALSE;
1305 #endif
1306 #ifdef UTF8_OUTPUT_ENABLE
1307                         ms_ucs_map_f = UCS_MAP_CP932;
1308 #endif
1309                     }else if(strcmp(codeset, "CP50220") == 0){
1310                         output_conv = j_oconv;
1311                         x0201_f = TRUE;
1312 #ifdef SHIFTJIS_CP932
1313                         cp932inv_f = FALSE;
1314 #endif
1315 #ifdef UTF8_OUTPUT_ENABLE
1316                         ms_ucs_map_f = UCS_MAP_CP932;
1317 #endif
1318                     }else if(strcmp(codeset, "CP50221") == 0){
1319                         output_conv = j_oconv;
1320 #ifdef SHIFTJIS_CP932
1321                         cp932inv_f = FALSE;
1322 #endif
1323 #ifdef UTF8_OUTPUT_ENABLE
1324                         ms_ucs_map_f = UCS_MAP_CP932;
1325 #endif
1326                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1327                         output_conv = j_oconv;
1328 #ifdef X0212_ENABLE
1329                         x0212_f = TRUE;
1330 #endif
1331 #ifdef SHIFTJIS_CP932
1332                         cp932inv_f = FALSE;
1333 #endif
1334                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1335                         output_conv = j_oconv;
1336 #ifdef X0212_ENABLE
1337                         x0212_f = TRUE;
1338 #endif
1339                         x0213_f = TRUE;
1340 #ifdef SHIFTJIS_CP932
1341                         cp932inv_f = FALSE;
1342 #endif
1343                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1344                         output_conv = s_oconv;
1345                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1346                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1347                              strcmp(codeset, "CP932") == 0 ||
1348                              strcmp(codeset, "MS932") == 0){
1349                         output_conv = s_oconv;
1350 #ifdef UTF8_OUTPUT_ENABLE
1351                         ms_ucs_map_f = UCS_MAP_CP932;
1352 #endif
1353                     }else if(strcmp(codeset, "CP10001") == 0){
1354                         output_conv = s_oconv;
1355 #ifdef UTF8_OUTPUT_ENABLE
1356                         ms_ucs_map_f = UCS_MAP_CP10001;
1357 #endif
1358                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1359                              strcmp(codeset, "EUC-JP") == 0){
1360                         output_conv = e_oconv;
1361                     }else if(strcmp(codeset, "CP51932") == 0){
1362                         output_conv = e_oconv;
1363 #ifdef SHIFTJIS_CP932
1364                         cp932inv_f = FALSE;
1365 #endif
1366 #ifdef UTF8_OUTPUT_ENABLE
1367                         ms_ucs_map_f = UCS_MAP_CP932;
1368 #endif
1369                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1370                              strcmp(codeset, "EUCJP-MS") == 0 ||
1371                              strcmp(codeset, "EUCJPMS") == 0){
1372                         output_conv = e_oconv;
1373 #ifdef X0212_ENABLE
1374                         x0212_f = TRUE;
1375 #endif
1376 #ifdef UTF8_OUTPUT_ENABLE
1377                         ms_ucs_map_f = UCS_MAP_MS;
1378 #endif
1379                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1380                              strcmp(codeset, "EUCJP-ASCII") == 0){
1381                         output_conv = e_oconv;
1382 #ifdef X0212_ENABLE
1383                         x0212_f = TRUE;
1384 #endif
1385 #ifdef UTF8_OUTPUT_ENABLE
1386                         ms_ucs_map_f = UCS_MAP_ASCII;
1387 #endif
1388                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1389                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1390                         output_conv = s_oconv;
1391                         x0213_f = TRUE;
1392 #ifdef SHIFTJIS_CP932
1393                         cp932inv_f = FALSE;
1394 #endif
1395                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1396                              strcmp(codeset, "EUC-JIS-2004") == 0){
1397                         output_conv = e_oconv;
1398 #ifdef X0212_ENABLE
1399                         x0212_f = TRUE;
1400 #endif
1401                         x0213_f = TRUE;
1402 #ifdef SHIFTJIS_CP932
1403                         cp932inv_f = FALSE;
1404 #endif
1405 #ifdef UTF8_OUTPUT_ENABLE
1406                     }else if(strcmp(codeset, "UTF-8") == 0){
1407                         output_conv = w_oconv;
1408                     }else if(strcmp(codeset, "UTF-8N") == 0){
1409                         output_conv = w_oconv;
1410                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1411                         output_conv = w_oconv;
1412                         output_bom_f = TRUE;
1413                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1414                         output_conv = w_oconv16;
1415                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1416                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1417                         output_conv = w_oconv16;
1418                         output_bom_f = TRUE;
1419                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1420                         output_conv = w_oconv16;
1421                         output_endian = ENDIAN_LITTLE;
1422                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1423                         output_conv = w_oconv16;
1424                         output_endian = ENDIAN_LITTLE;
1425                         output_bom_f = TRUE;
1426                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1427                              strcmp(codeset, "UTF-32BE") == 0){
1428                         output_conv = w_oconv32;
1429                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1430                         output_conv = w_oconv32;
1431                         output_bom_f = TRUE;
1432                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1433                         output_conv = w_oconv32;
1434                         output_endian = ENDIAN_LITTLE;
1435                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1436                         output_conv = w_oconv32;
1437                         output_endian = ENDIAN_LITTLE;
1438                         output_bom_f = TRUE;
1439 #endif
1440                     } else {
1441                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1442                     }
1443                     continue;
1444                 }
1445 #ifdef OVERWRITE
1446                 if (strcmp(long_option[i].name, "overwrite") == 0){
1447                     file_out_f = TRUE;
1448                     overwrite_f = TRUE;
1449                     preserve_time_f = TRUE;
1450                     continue;
1451                 }
1452                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1453                     file_out_f = TRUE;
1454                     overwrite_f = TRUE;
1455                     preserve_time_f = TRUE;
1456                     backup_f = TRUE;
1457                     backup_suffix = malloc(strlen((char *) p) + 1);
1458                     strcpy(backup_suffix, (char *) p);
1459                     continue;
1460                 }
1461                 if (strcmp(long_option[i].name, "in-place") == 0){
1462                     file_out_f = TRUE;
1463                     overwrite_f = TRUE;
1464                     preserve_time_f = FALSE;
1465                     continue;
1466                 }
1467                 if (strcmp(long_option[i].name, "in-place=") == 0){
1468                     file_out_f = TRUE;
1469                     overwrite_f = TRUE;
1470                     preserve_time_f = FALSE;
1471                     backup_f = TRUE;
1472                     backup_suffix = malloc(strlen((char *) p) + 1);
1473                     strcpy(backup_suffix, (char *) p);
1474                     continue;
1475                 }
1476 #endif
1477 #ifdef INPUT_OPTION
1478                 if (strcmp(long_option[i].name, "cap-input") == 0){
1479                     cap_f = TRUE;
1480                     continue;
1481                 }
1482                 if (strcmp(long_option[i].name, "url-input") == 0){
1483                     url_f = TRUE;
1484                     continue;
1485                 }
1486 #endif
1487 #ifdef NUMCHAR_OPTION
1488                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1489                     numchar_f = TRUE;
1490                     continue;
1491                 }
1492 #endif
1493 #ifdef CHECK_OPTION
1494                 if (strcmp(long_option[i].name, "no-output") == 0){
1495                     noout_f = TRUE;
1496                     continue;
1497                 }
1498                 if (strcmp(long_option[i].name, "debug") == 0){
1499                     debug_f = TRUE;
1500                     continue;
1501                 }
1502 #endif
1503                 if (strcmp(long_option[i].name, "cp932") == 0){
1504 #ifdef SHIFTJIS_CP932
1505                     cp51932_f = TRUE;
1506                     cp932inv_f = TRUE;
1507 #endif
1508 #ifdef UTF8_OUTPUT_ENABLE
1509                     ms_ucs_map_f = UCS_MAP_CP932;
1510 #endif
1511                     continue;
1512                 }
1513                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1514 #ifdef SHIFTJIS_CP932
1515                     cp51932_f = FALSE;
1516                     cp932inv_f = FALSE;
1517 #endif
1518 #ifdef UTF8_OUTPUT_ENABLE
1519                     ms_ucs_map_f = UCS_MAP_ASCII;
1520 #endif
1521                     continue;
1522                 }
1523 #ifdef SHIFTJIS_CP932
1524                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1525                     cp932inv_f = TRUE;
1526                     continue;
1527                 }
1528 #endif
1529
1530 #ifdef X0212_ENABLE
1531                 if (strcmp(long_option[i].name, "x0212") == 0){
1532                     x0212_f = TRUE;
1533                     continue;
1534                 }
1535 #endif
1536
1537 #ifdef EXEC_IO
1538                   if (strcmp(long_option[i].name, "exec-in") == 0){
1539                       exec_f = 1;
1540                       return;
1541                   }
1542                   if (strcmp(long_option[i].name, "exec-out") == 0){
1543                       exec_f = -1;
1544                       return;
1545                   }
1546 #endif
1547 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1548                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1549                     no_cp932ext_f = TRUE;
1550                     continue;
1551                 }
1552                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1553                     no_best_fit_chars_f = TRUE;
1554                     continue;
1555                 }
1556                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1557                     encode_fallback = NULL;
1558                     continue;
1559                 }
1560                 if (strcmp(long_option[i].name, "fb-html") == 0){
1561                     encode_fallback = encode_fallback_html;
1562                     continue;
1563                 }
1564                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1565                     encode_fallback = encode_fallback_xml;
1566                     continue;
1567                 }
1568                 if (strcmp(long_option[i].name, "fb-java") == 0){
1569                     encode_fallback = encode_fallback_java;
1570                     continue;
1571                 }
1572                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1573                     encode_fallback = encode_fallback_perl;
1574                     continue;
1575                 }
1576                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1577                     encode_fallback = encode_fallback_subchar;
1578                     continue;
1579                 }
1580                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1581                     encode_fallback = encode_fallback_subchar;
1582                     unicode_subchar = 0;
1583                     if (p[0] != '0'){
1584                         /* decimal number */
1585                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1586                             unicode_subchar *= 10;
1587                             unicode_subchar += hex2bin(p[i]);
1588                         }
1589                     }else if(p[1] == 'x' || p[1] == 'X'){
1590                         /* hexadecimal number */
1591                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1592                             unicode_subchar <<= 4;
1593                             unicode_subchar |= hex2bin(p[i]);
1594                         }
1595                     }else{
1596                         /* octal number */
1597                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1598                             unicode_subchar *= 8;
1599                             unicode_subchar += hex2bin(p[i]);
1600                         }
1601                     }
1602                     w16e_conv(unicode_subchar, &i, &j);
1603                     unicode_subchar = i<<8 | j;
1604                     continue;
1605                 }
1606 #endif
1607 #ifdef UTF8_OUTPUT_ENABLE
1608                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1609                     ms_ucs_map_f = UCS_MAP_MS;
1610                     continue;
1611                 }
1612 #endif
1613 #ifdef UNICODE_NORMALIZATION
1614                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1615                     input_f = UTF8_INPUT;
1616                     nfc_f = TRUE;
1617                     continue;
1618                 }
1619 #endif
1620                 if (strcmp(long_option[i].name, "prefix=") == 0){
1621                     if (nkf_isgraph(p[0])){
1622                         for (i = 1; nkf_isgraph(p[i]); i++){
1623                             prefix_table[p[i]] = p[0];
1624                         }
1625                     }
1626                     continue;
1627                 }
1628             }
1629             continue;
1630         case 'b':           /* buffered mode */
1631             unbuf_f = FALSE;
1632             continue;
1633         case 'u':           /* non bufferd mode */
1634             unbuf_f = TRUE;
1635             continue;
1636         case 't':           /* transparent mode */
1637             if (*cp=='1') {
1638                 /* alias of -t */
1639                 nop_f = TRUE;
1640                 *cp++;
1641             } else if (*cp=='2') {
1642                 /*
1643                  * -t with put/get
1644                  *
1645                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1646                  *
1647                  */
1648                 nop_f = 2;
1649                 *cp++;
1650             } else
1651                 nop_f = TRUE;
1652             continue;
1653         case 'j':           /* JIS output */
1654         case 'n':
1655             output_conv = j_oconv;
1656             continue;
1657         case 'e':           /* AT&T EUC output */
1658             output_conv = e_oconv;
1659             cp932inv_f = FALSE;
1660             continue;
1661         case 's':           /* SJIS output */
1662             output_conv = s_oconv;
1663             continue;
1664         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1665             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1666             input_f = LATIN1_INPUT;
1667             continue;
1668         case 'i':           /* Kanji IN ESC-$-@/B */
1669             if (*cp=='@'||*cp=='B')
1670                 kanji_intro = *cp++;
1671             continue;
1672         case 'o':           /* ASCII IN ESC-(-J/B */
1673             if (*cp=='J'||*cp=='B'||*cp=='H')
1674                 ascii_intro = *cp++;
1675             continue;
1676         case 'h':
1677             /*
1678                 bit:1   katakana->hiragana
1679                 bit:2   hiragana->katakana
1680             */
1681             if ('9'>= *cp && *cp>='0')
1682                 hira_f |= (*cp++ -'0');
1683             else
1684                 hira_f |= 1;
1685             continue;
1686         case 'r':
1687             rot_f = TRUE;
1688             continue;
1689 #if defined(MSDOS) || defined(__OS2__)
1690         case 'T':
1691             binmode_f = FALSE;
1692             continue;
1693 #endif
1694 #ifndef PERL_XS
1695         case 'V':
1696             version();
1697             exit(1);
1698             break;
1699         case 'v':
1700             usage();
1701             exit(1);
1702             break;
1703 #endif
1704 #ifdef UTF8_OUTPUT_ENABLE
1705         case 'w':           /* UTF-8 output */
1706             if (cp[0] == '8') {
1707                 output_conv = w_oconv; cp++;
1708                 if (cp[0] == '0'){
1709                     cp++;
1710                 } else {
1711                     output_bom_f = TRUE;
1712                 }
1713             } else {
1714                 if ('1'== cp[0] && '6'==cp[1]) {
1715                     output_conv = w_oconv16; cp+=2;
1716                 } else if ('3'== cp[0] && '2'==cp[1]) {
1717                     output_conv = w_oconv32; cp+=2;
1718                 } else {
1719                     output_conv = w_oconv;
1720                     continue;
1721                 }
1722                 if (cp[0]=='L') {
1723                     cp++;
1724                     output_endian = ENDIAN_LITTLE;
1725                 } else if (cp[0] == 'B') {
1726                     cp++;
1727                 } else {
1728                     continue;
1729                 }
1730                 if (cp[0] == '0'){
1731                     cp++;
1732                 } else {
1733                     output_bom_f = TRUE;
1734                 }
1735             }
1736             continue;
1737 #endif
1738 #ifdef UTF8_INPUT_ENABLE
1739         case 'W':           /* UTF input */
1740             if (cp[0] == '8') {
1741                 cp++;
1742                 input_f = UTF8_INPUT;
1743             }else{
1744                 if ('1'== cp[0] && '6'==cp[1]) {
1745                     cp += 2;
1746                     input_f = UTF16_INPUT;
1747                     input_endian = ENDIAN_BIG;
1748                 } else if ('3'== cp[0] && '2'==cp[1]) {
1749                     cp += 2;
1750                     input_f = UTF32_INPUT;
1751                     input_endian = ENDIAN_BIG;
1752                 } else {
1753                     input_f = UTF8_INPUT;
1754                     continue;
1755                 }
1756                 if (cp[0]=='L') {
1757                     cp++;
1758                     input_endian = ENDIAN_LITTLE;
1759                 } else if (cp[0] == 'B') {
1760                     cp++;
1761                 }
1762             }
1763             continue;
1764 #endif
1765         /* Input code assumption */
1766         case 'J':   /* JIS input */
1767             input_f = JIS_INPUT;
1768             continue;
1769         case 'E':   /* AT&T EUC input */
1770             input_f = EUC_INPUT;
1771             continue;
1772         case 'S':   /* MS Kanji input */
1773             input_f = SJIS_INPUT;
1774             if (x0201_f==NO_X0201) x0201_f=TRUE;
1775             continue;
1776         case 'Z':   /* Convert X0208 alphabet to asii */
1777             /* alpha_f
1778                bit:0   Convert JIS X 0208 Alphabet to ASCII
1779                bit:1   Convert Kankaku to one space
1780                bit:2   Convert Kankaku to two spaces
1781                bit:3   Convert HTML Entity
1782                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1783             */
1784             while ('0'<= *cp && *cp <='9') {
1785                 alpha_f |= 1 << (*cp++ - '0');
1786             }
1787             if (!alpha_f) alpha_f = 1;
1788             continue;
1789         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1790             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1791             /* accept  X0201
1792                     ESC-(-I     in JIS, EUC, MS Kanji
1793                     SI/SO       in JIS, EUC, MS Kanji
1794                     SSO         in EUC, JIS, not in MS Kanji
1795                     MS Kanji (0xa0-0xdf)
1796                output  X0201
1797                     ESC-(-I     in JIS (0x20-0x5f)
1798                     SSO         in EUC (0xa0-0xdf)
1799                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1800             */
1801             continue;
1802         case 'X':   /* Assume X0201 kana */
1803             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1804             x0201_f = TRUE;
1805             continue;
1806         case 'F':   /* prserve new lines */
1807             fold_preserve_f = TRUE;
1808         case 'f':   /* folding -f60 or -f */
1809             fold_f = TRUE;
1810             fold_len = 0;
1811             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1812                 fold_len *= 10;
1813                 fold_len += *cp++ - '0';
1814             }
1815             if (!(0<fold_len && fold_len<BUFSIZ))
1816                 fold_len = DEFAULT_FOLD;
1817             if (*cp=='-') {
1818                 fold_margin = 0;
1819                 cp++;
1820                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1821                     fold_margin *= 10;
1822                     fold_margin += *cp++ - '0';
1823                 }
1824             }
1825             continue;
1826         case 'm':   /* MIME support */
1827             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1828             if (*cp=='B'||*cp=='Q') {
1829                 mime_decode_mode = *cp++;
1830                 mimebuf_f = FIXED_MIME;
1831             } else if (*cp=='N') {
1832                 mime_f = TRUE; cp++;
1833             } else if (*cp=='S') {
1834                 mime_f = STRICT_MIME; cp++;
1835             } else if (*cp=='0') {
1836                 mime_decode_f = FALSE;
1837                 mime_f = FALSE; cp++;
1838             }
1839             continue;
1840         case 'M':   /* MIME output */
1841             if (*cp=='B') {
1842                 mimeout_mode = 'B';
1843                 mimeout_f = FIXED_MIME; cp++;
1844             } else if (*cp=='Q') {
1845                 mimeout_mode = 'Q';
1846                 mimeout_f = FIXED_MIME; cp++;
1847             } else {
1848                 mimeout_f = TRUE;
1849             }
1850             continue;
1851         case 'B':   /* Broken JIS support */
1852             /*  bit:0   no ESC JIS
1853                 bit:1   allow any x on ESC-(-x or ESC-$-x
1854                 bit:2   reset to ascii on NL
1855             */
1856             if ('9'>= *cp && *cp>='0')
1857                 broken_f |= 1<<(*cp++ -'0');
1858             else
1859                 broken_f |= TRUE;
1860             continue;
1861 #ifndef PERL_XS
1862         case 'O':/* for Output file */
1863             file_out_f = TRUE;
1864             continue;
1865 #endif
1866         case 'c':/* add cr code */
1867             nlmode_f = CRLF;
1868             continue;
1869         case 'd':/* delete cr code */
1870             nlmode_f = LF;
1871             continue;
1872         case 'I':   /* ISO-2022-JP output */
1873             iso2022jp_f = TRUE;
1874             continue;
1875         case 'L':  /* line mode */
1876             if (*cp=='u') {         /* unix */
1877                 nlmode_f = LF; cp++;
1878             } else if (*cp=='m') { /* mac */
1879                 nlmode_f = CR; cp++;
1880             } else if (*cp=='w') { /* windows */
1881                 nlmode_f = CRLF; cp++;
1882             } else if (*cp=='0') { /* no conversion  */
1883                 nlmode_f = 0; cp++;
1884             }
1885             continue;
1886 #ifndef PERL_XS
1887         case 'g':
1888             guess_f = TRUE;
1889             continue;
1890 #endif
1891         case SP:
1892         /* module muliple options in a string are allowed for Perl moudle  */
1893             while(*cp && *cp++!='-');
1894             continue;
1895         default:
1896             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
1897             /* bogus option but ignored */
1898             continue;
1899         }
1900     }
1901 }
1902
1903 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1904 {
1905     if (iconv_func){
1906         struct input_code *p = input_code_list;
1907         while (p->name){
1908             if (iconv_func == p->iconv_func){
1909                 return p;
1910             }
1911             p++;
1912         }
1913     }
1914     return 0;
1915 }
1916
1917 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1918 {
1919 #ifdef INPUT_CODE_FIX
1920     if (f || !input_f)
1921 #endif
1922         if (estab_f != f){
1923             estab_f = f;
1924         }
1925
1926     if (iconv_func
1927 #ifdef INPUT_CODE_FIX
1928         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1929 #endif
1930         ){
1931         iconv = iconv_func;
1932     }
1933 #ifdef CHECK_OPTION
1934     if (estab_f && iconv_for_check != iconv){
1935         struct input_code *p = find_inputcode_byfunc(iconv);
1936         if (p){
1937             set_input_codename(p->name);
1938             debug(p->name);
1939         }
1940         iconv_for_check = iconv;
1941     }
1942 #endif
1943 }
1944
1945 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1946 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1947 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1948 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
1949 #define SCORE_NO_EXIST (SCORE_CP932 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1950 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1951 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1952
1953 #define SCORE_INIT (SCORE_iMIME)
1954
1955 static const char score_table_A0[] = {
1956     0, 0, 0, 0,
1957     0, 0, 0, 0,
1958     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1959     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1960 };
1961
1962 static const char score_table_F0[] = {
1963     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1964     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1965     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
1966     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1967 };
1968
1969 void set_code_score(struct input_code *ptr, nkf_char score)
1970 {
1971     if (ptr){
1972         ptr->score |= score;
1973     }
1974 }
1975
1976 void clr_code_score(struct input_code *ptr, nkf_char score)
1977 {
1978     if (ptr){
1979         ptr->score &= ~score;
1980     }
1981 }
1982
1983 void code_score(struct input_code *ptr)
1984 {
1985     nkf_char c2 = ptr->buf[0];
1986 #ifdef UTF8_OUTPUT_ENABLE
1987     nkf_char c1 = ptr->buf[1];
1988 #endif
1989     if (c2 < 0){
1990         set_code_score(ptr, SCORE_ERROR);
1991     }else if (c2 == SSO){
1992         set_code_score(ptr, SCORE_KANA);
1993 #ifdef UTF8_OUTPUT_ENABLE
1994     }else if (!e2w_conv(c2, c1)){
1995         set_code_score(ptr, SCORE_NO_EXIST);
1996 #endif
1997     }else if ((c2 & 0x70) == 0x20){
1998         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1999     }else if ((c2 & 0x70) == 0x70){
2000         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2001     }else if ((c2 & 0x70) >= 0x50){
2002         set_code_score(ptr, SCORE_L2);
2003     }
2004 }
2005
2006 void status_disable(struct input_code *ptr)
2007 {
2008     ptr->stat = -1;
2009     ptr->buf[0] = -1;
2010     code_score(ptr);
2011     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2012 }
2013
2014 void status_push_ch(struct input_code *ptr, nkf_char c)
2015 {
2016     ptr->buf[ptr->index++] = c;
2017 }
2018
2019 void status_clear(struct input_code *ptr)
2020 {
2021     ptr->stat = 0;
2022     ptr->index = 0;
2023 }
2024
2025 void status_reset(struct input_code *ptr)
2026 {
2027     status_clear(ptr);
2028     ptr->score = SCORE_INIT;
2029 }
2030
2031 void status_reinit(struct input_code *ptr)
2032 {
2033     status_reset(ptr);
2034     ptr->_file_stat = 0;
2035 }
2036
2037 void status_check(struct input_code *ptr, nkf_char c)
2038 {
2039     if (c <= DEL && estab_f){
2040         status_reset(ptr);
2041     }
2042 }
2043
2044 void s_status(struct input_code *ptr, nkf_char c)
2045 {
2046     switch(ptr->stat){
2047       case -1:
2048           status_check(ptr, c);
2049           break;
2050       case 0:
2051           if (c <= DEL){
2052               break;
2053 #ifdef NUMCHAR_OPTION
2054           }else if (is_unicode_capsule(c)){
2055               break;
2056 #endif
2057           }else if (0xa1 <= c && c <= 0xdf){
2058               status_push_ch(ptr, SSO);
2059               status_push_ch(ptr, c);
2060               code_score(ptr);
2061               status_clear(ptr);
2062           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2063               ptr->stat = 1;
2064               status_push_ch(ptr, c);
2065 #ifdef SHIFTJIS_CP932
2066           }else if (is_ibmext_in_sjis(c)){
2067               ptr->stat = 2;
2068               status_push_ch(ptr, c);
2069 #endif /* SHIFTJIS_CP932 */
2070 #ifdef X0212_ENABLE
2071           }else if (0xf0 <= c && c <= 0xfc){
2072               ptr->stat = 1;
2073               status_push_ch(ptr, c);
2074 #endif /* X0212_ENABLE */
2075           }else{
2076               status_disable(ptr);
2077           }
2078           break;
2079       case 1:
2080           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2081               status_push_ch(ptr, c);
2082               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2083               code_score(ptr);
2084               status_clear(ptr);
2085           }else{
2086               status_disable(ptr);
2087           }
2088           break;
2089       case 2:
2090 #ifdef SHIFTJIS_CP932
2091         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2092             status_push_ch(ptr, c);
2093             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2094                 code_score(ptr);
2095                 status_clear(ptr);
2096                 break;
2097             }
2098         }
2099 #endif /* SHIFTJIS_CP932 */
2100         status_disable(ptr);
2101           break;
2102     }
2103 }
2104
2105 void e_status(struct input_code *ptr, nkf_char c)
2106 {
2107     switch (ptr->stat){
2108       case -1:
2109           status_check(ptr, c);
2110           break;
2111       case 0:
2112           if (c <= DEL){
2113               break;
2114 #ifdef NUMCHAR_OPTION
2115           }else if (is_unicode_capsule(c)){
2116               break;
2117 #endif
2118           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2119               ptr->stat = 1;
2120               status_push_ch(ptr, c);
2121 #ifdef X0212_ENABLE
2122           }else if (0x8f == c){
2123               ptr->stat = 2;
2124               status_push_ch(ptr, c);
2125 #endif /* X0212_ENABLE */
2126           }else{
2127               status_disable(ptr);
2128           }
2129           break;
2130       case 1:
2131           if (0xa1 <= c && c <= 0xfe){
2132               status_push_ch(ptr, c);
2133               code_score(ptr);
2134               status_clear(ptr);
2135           }else{
2136               status_disable(ptr);
2137           }
2138           break;
2139 #ifdef X0212_ENABLE
2140       case 2:
2141           if (0xa1 <= c && c <= 0xfe){
2142               ptr->stat = 1;
2143               status_push_ch(ptr, c);
2144           }else{
2145               status_disable(ptr);
2146           }
2147 #endif /* X0212_ENABLE */
2148     }
2149 }
2150
2151 #ifdef UTF8_INPUT_ENABLE
2152 void w_status(struct input_code *ptr, nkf_char c)
2153 {
2154     switch (ptr->stat){
2155       case -1:
2156           status_check(ptr, c);
2157           break;
2158       case 0:
2159           if (c <= DEL){
2160               break;
2161 #ifdef NUMCHAR_OPTION
2162           }else if (is_unicode_capsule(c)){
2163               break;
2164 #endif
2165           }else if (0xc0 <= c && c <= 0xdf){
2166               ptr->stat = 1;
2167               status_push_ch(ptr, c);
2168           }else if (0xe0 <= c && c <= 0xef){
2169               ptr->stat = 2;
2170               status_push_ch(ptr, c);
2171           }else if (0xf0 <= c && c <= 0xf4){
2172               ptr->stat = 3;
2173               status_push_ch(ptr, c);
2174           }else{
2175               status_disable(ptr);
2176           }
2177           break;
2178       case 1:
2179       case 2:
2180           if (0x80 <= c && c <= 0xbf){
2181               status_push_ch(ptr, c);
2182               if (ptr->index > ptr->stat){
2183                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2184                              && ptr->buf[2] == 0xbf);
2185                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2186                            &ptr->buf[0], &ptr->buf[1]);
2187                   if (!bom){
2188                       code_score(ptr);
2189                   }
2190                   status_clear(ptr);
2191               }
2192           }else{
2193               status_disable(ptr);
2194           }
2195           break;
2196       case 3:
2197         if (0x80 <= c && c <= 0xbf){
2198             if (ptr->index < ptr->stat){
2199                 status_push_ch(ptr, c);
2200             } else {
2201                 status_clear(ptr);
2202             }
2203           }else{
2204               status_disable(ptr);
2205           }
2206           break;
2207     }
2208 }
2209 #endif
2210
2211 void code_status(nkf_char c)
2212 {
2213     int action_flag = 1;
2214     struct input_code *result = 0;
2215     struct input_code *p = input_code_list;
2216     while (p->name){
2217         if (!p->status_func) {
2218             ++p;
2219             continue;
2220         }
2221         if (!p->status_func)
2222             continue;
2223         (p->status_func)(p, c);
2224         if (p->stat > 0){
2225             action_flag = 0;
2226         }else if(p->stat == 0){
2227             if (result){
2228                 action_flag = 0;
2229             }else{
2230                 result = p;
2231             }
2232         }
2233         ++p;
2234     }
2235
2236     if (action_flag){
2237         if (result && !estab_f){
2238             set_iconv(TRUE, result->iconv_func);
2239         }else if (c <= DEL){
2240             struct input_code *ptr = input_code_list;
2241             while (ptr->name){
2242                 status_reset(ptr);
2243                 ++ptr;
2244             }
2245         }
2246     }
2247 }
2248
2249 #ifndef WIN32DLL
2250 nkf_char std_getc(FILE *f)
2251 {
2252     if (std_gc_ndx){
2253         return std_gc_buf[--std_gc_ndx];
2254     }
2255     return getc(f);
2256 }
2257 #endif /*WIN32DLL*/
2258
2259 nkf_char std_ungetc(nkf_char c, FILE *f)
2260 {
2261     if (std_gc_ndx == STD_GC_BUFSIZE){
2262         return EOF;
2263     }
2264     std_gc_buf[std_gc_ndx++] = c;
2265     return c;
2266 }
2267
2268 #ifndef WIN32DLL
2269 void std_putc(nkf_char c)
2270 {
2271     if(c!=EOF)
2272       putchar(c);
2273 }
2274 #endif /*WIN32DLL*/
2275
2276 #if !defined(PERL_XS) && !defined(WIN32DLL)
2277 nkf_char noconvert(FILE *f)
2278 {
2279     nkf_char    c;
2280
2281     if (nop_f == 2)
2282         module_connection();
2283     while ((c = (*i_getc)(f)) != EOF)
2284       (*o_putc)(c);
2285     (*o_putc)(EOF);
2286     return 1;
2287 }
2288 #endif
2289
2290 void module_connection(void)
2291 {
2292     oconv = output_conv;
2293     o_putc = std_putc;
2294
2295     /* replace continucation module, from output side */
2296
2297     /* output redicrection */
2298 #ifdef CHECK_OPTION
2299     if (noout_f || guess_f){
2300         o_putc = no_putc;
2301     }
2302 #endif
2303     if (mimeout_f) {
2304         o_mputc = o_putc;
2305         o_putc = mime_putc;
2306         if (mimeout_f == TRUE) {
2307             o_base64conv = oconv; oconv = base64_conv;
2308         }
2309         /* base64_count = 0; */
2310     }
2311
2312     if (nlmode_f || guess_f) {
2313         o_nlconv = oconv; oconv = nl_conv;
2314     }
2315     if (rot_f) {
2316         o_rot_conv = oconv; oconv = rot_conv;
2317     }
2318     if (iso2022jp_f) {
2319         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2320     }
2321     if (hira_f) {
2322         o_hira_conv = oconv; oconv = hira_conv;
2323     }
2324     if (fold_f) {
2325         o_fconv = oconv; oconv = fold_conv;
2326         f_line = 0;
2327     }
2328     if (alpha_f || x0201_f) {
2329         o_zconv = oconv; oconv = z_conv;
2330     }
2331
2332     i_getc = std_getc;
2333     i_ungetc = std_ungetc;
2334     /* input redicrection */
2335 #ifdef INPUT_OPTION
2336     if (cap_f){
2337         i_cgetc = i_getc; i_getc = cap_getc;
2338         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2339     }
2340     if (url_f){
2341         i_ugetc = i_getc; i_getc = url_getc;
2342         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2343     }
2344 #endif
2345 #ifdef NUMCHAR_OPTION
2346     if (numchar_f){
2347         i_ngetc = i_getc; i_getc = numchar_getc;
2348         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2349     }
2350 #endif
2351 #ifdef UNICODE_NORMALIZATION
2352     if (nfc_f && input_f == UTF8_INPUT){
2353         i_nfc_getc = i_getc; i_getc = nfc_getc;
2354         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2355     }
2356 #endif
2357     if (mime_f && mimebuf_f==FIXED_MIME) {
2358         i_mgetc = i_getc; i_getc = mime_getc;
2359         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2360     }
2361     if (broken_f & 1) {
2362         i_bgetc = i_getc; i_getc = broken_getc;
2363         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2364     }
2365     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2366         set_iconv(-TRUE, e_iconv);
2367     } else if (input_f == SJIS_INPUT) {
2368         set_iconv(-TRUE, s_iconv);
2369 #ifdef UTF8_INPUT_ENABLE
2370     } else if (input_f == UTF8_INPUT) {
2371         set_iconv(-TRUE, w_iconv);
2372     } else if (input_f == UTF16_INPUT) {
2373         set_iconv(-TRUE, w_iconv16);
2374     } else if (input_f == UTF32_INPUT) {
2375         set_iconv(-TRUE, w_iconv32);
2376 #endif
2377     } else {
2378         set_iconv(FALSE, e_iconv);
2379     }
2380
2381     {
2382         struct input_code *p = input_code_list;
2383         while (p->name){
2384             status_reinit(p++);
2385         }
2386     }
2387 }
2388
2389 /*
2390  * Check and Ignore BOM
2391  */
2392 void check_bom(FILE *f)
2393 {
2394     int c2;
2395     switch(c2 = (*i_getc)(f)){
2396     case 0x00:
2397         if((c2 = (*i_getc)(f)) == 0x00){
2398             if((c2 = (*i_getc)(f)) == 0xFE){
2399                 if((c2 = (*i_getc)(f)) == 0xFF){
2400                     if(!input_f){
2401                         set_iconv(TRUE, w_iconv32);
2402                     }
2403                     if (iconv == w_iconv32) {
2404                         input_endian = ENDIAN_BIG;
2405                         return;
2406                     }
2407                     (*i_ungetc)(0xFF,f);
2408                 }else (*i_ungetc)(c2,f);
2409                 (*i_ungetc)(0xFE,f);
2410             }else if(c2 == 0xFF){
2411                 if((c2 = (*i_getc)(f)) == 0xFE){
2412                     if(!input_f){
2413                         set_iconv(TRUE, w_iconv32);
2414                     }
2415                     if (iconv == w_iconv32) {
2416                         input_endian = ENDIAN_2143;
2417                         return;
2418                     }
2419                     (*i_ungetc)(0xFF,f);
2420                 }else (*i_ungetc)(c2,f);
2421                 (*i_ungetc)(0xFF,f);
2422             }else (*i_ungetc)(c2,f);
2423             (*i_ungetc)(0x00,f);
2424         }else (*i_ungetc)(c2,f);
2425         (*i_ungetc)(0x00,f);
2426         break;
2427     case 0xEF:
2428         if((c2 = (*i_getc)(f)) == 0xBB){
2429             if((c2 = (*i_getc)(f)) == 0xBF){
2430                 if(!input_f){
2431                     set_iconv(TRUE, w_iconv);
2432                 }
2433                 if (iconv == w_iconv) {
2434                     return;
2435                 }
2436                 (*i_ungetc)(0xBF,f);
2437             }else (*i_ungetc)(c2,f);
2438             (*i_ungetc)(0xBB,f);
2439         }else (*i_ungetc)(c2,f);
2440         (*i_ungetc)(0xEF,f);
2441         break;
2442     case 0xFE:
2443         if((c2 = (*i_getc)(f)) == 0xFF){
2444             if((c2 = (*i_getc)(f)) == 0x00){
2445                 if((c2 = (*i_getc)(f)) == 0x00){
2446                     if(!input_f){
2447                         set_iconv(TRUE, w_iconv32);
2448                     }
2449                     if (iconv == w_iconv32) {
2450                         input_endian = ENDIAN_3412;
2451                         return;
2452                     }
2453                     (*i_ungetc)(0x00,f);
2454                 }else (*i_ungetc)(c2,f);
2455                 (*i_ungetc)(0x00,f);
2456             }else (*i_ungetc)(c2,f);
2457             if(!input_f){
2458                 set_iconv(TRUE, w_iconv16);
2459             }
2460             if (iconv == w_iconv16) {
2461                 input_endian = ENDIAN_BIG;
2462                 return;
2463             }
2464             (*i_ungetc)(0xFF,f);
2465         }else (*i_ungetc)(c2,f);
2466         (*i_ungetc)(0xFE,f);
2467         break;
2468     case 0xFF:
2469         if((c2 = (*i_getc)(f)) == 0xFE){
2470             if((c2 = (*i_getc)(f)) == 0x00){
2471                 if((c2 = (*i_getc)(f)) == 0x00){
2472                     if(!input_f){
2473                         set_iconv(TRUE, w_iconv32);
2474                     }
2475                     if (iconv == w_iconv32) {
2476                         input_endian = ENDIAN_LITTLE;
2477                         return;
2478                     }
2479                     (*i_ungetc)(0x00,f);
2480                 }else (*i_ungetc)(c2,f);
2481                 (*i_ungetc)(0x00,f);
2482             }else (*i_ungetc)(c2,f);
2483             if(!input_f){
2484                 set_iconv(TRUE, w_iconv16);
2485             }
2486             if (iconv == w_iconv16) {
2487                 input_endian = ENDIAN_LITTLE;
2488                 return;
2489             }
2490             (*i_ungetc)(0xFE,f);
2491         }else (*i_ungetc)(c2,f);
2492         (*i_ungetc)(0xFF,f);
2493         break;
2494     default:
2495         (*i_ungetc)(c2,f);
2496         break;
2497     }
2498 }
2499
2500 /*
2501    Conversion main loop. Code detection only.
2502  */
2503
2504 nkf_char kanji_convert(FILE *f)
2505 {
2506     nkf_char    c3, c2=0, c1, c0=0;
2507     int is_8bit = FALSE;
2508
2509     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2510 #ifdef UTF8_INPUT_ENABLE
2511        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2512 #endif
2513       ){
2514         is_8bit = TRUE;
2515     }
2516
2517     input_mode = ASCII;
2518     output_mode = ASCII;
2519     shift_mode = FALSE;
2520
2521 #define NEXT continue      /* no output, get next */
2522 #define SEND ;             /* output c1 and c2, get next */
2523 #define LAST break         /* end of loop, go closing  */
2524
2525     module_connection();
2526     check_bom(f);
2527
2528     while ((c1 = (*i_getc)(f)) != EOF) {
2529 #ifdef INPUT_CODE_FIX
2530         if (!input_f)
2531 #endif
2532             code_status(c1);
2533         if (c2) {
2534             /* second byte */
2535             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2536                 /* in case of 8th bit is on */
2537                 if (!estab_f&&!mime_decode_mode) {
2538                     /* in case of not established yet */
2539                     /* It is still ambiguious */
2540                     if (h_conv(f, c2, c1)==EOF)
2541                         LAST;
2542                     else
2543                         c2 = 0;
2544                     NEXT;
2545                 } else {
2546                     /* in case of already established */
2547                     if (c1 < AT) {
2548                         /* ignore bogus code and not CP5022x UCD */
2549                         c2 = 0;
2550                         NEXT;
2551                     } else {
2552                         SEND;
2553                     }
2554                 }
2555             } else
2556                 /* second byte, 7 bit code */
2557                 /* it might be kanji shitfted */
2558                 if ((c1 == DEL) || (c1 <= SP)) {
2559                     /* ignore bogus first code */
2560                     c2 = 0;
2561                     NEXT;
2562                 } else
2563                     SEND;
2564         } else {
2565             /* first byte */
2566 #ifdef UTF8_INPUT_ENABLE
2567             if (iconv == w_iconv16) {
2568                 if (input_endian == ENDIAN_BIG) {
2569                     c2 = c1;
2570                     if ((c1 = (*i_getc)(f)) != EOF) {
2571                         if (0xD8 <= c2 && c2 <= 0xDB) {
2572                             if ((c0 = (*i_getc)(f)) != EOF) {
2573                                 c0 <<= 8;
2574                                 if ((c3 = (*i_getc)(f)) != EOF) {
2575                                     c0 |= c3;
2576                                 } else c2 = EOF;
2577                             } else c2 = EOF;
2578                         }
2579                     } else c2 = EOF;
2580                 } else {
2581                     if ((c2 = (*i_getc)(f)) != EOF) {
2582                         if (0xD8 <= c2 && c2 <= 0xDB) {
2583                             if ((c3 = (*i_getc)(f)) != EOF) {
2584                                 if ((c0 = (*i_getc)(f)) != EOF) {
2585                                     c0 <<= 8;
2586                                     c0 |= c3;
2587                                 } else c2 = EOF;
2588                             } else c2 = EOF;
2589                         }
2590                     } else c2 = EOF;
2591                 }
2592                 SEND;
2593             } else if(iconv == w_iconv32){
2594                 int c3 = c1;
2595                 if((c2 = (*i_getc)(f)) != EOF &&
2596                    (c1 = (*i_getc)(f)) != EOF &&
2597                    (c0 = (*i_getc)(f)) != EOF){
2598                     switch(input_endian){
2599                     case ENDIAN_BIG:
2600                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2601                         break;
2602                     case ENDIAN_LITTLE:
2603                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2604                         break;
2605                     case ENDIAN_2143:
2606                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2607                         break;
2608                     case ENDIAN_3412:
2609                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2610                         break;
2611                     }
2612                     c2 = 0;
2613                 }else{
2614                     c2 = EOF;
2615                 }
2616                 SEND;
2617             } else
2618 #endif
2619 #ifdef NUMCHAR_OPTION
2620             if (is_unicode_capsule(c1)){
2621                 SEND;
2622             } else
2623 #endif
2624             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2625                 /* 8 bit code */
2626                 if (!estab_f && !iso8859_f) {
2627                     /* not established yet */
2628                     c2 = c1;
2629                     NEXT;
2630                 } else { /* estab_f==TRUE */
2631                     if (iso8859_f) {
2632                         c2 = ISO8859_1;
2633                         c1 &= 0x7f;
2634                         SEND;
2635                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2636                         /* SJIS X0201 Case... */
2637                         if(iso2022jp_f && x0201_f==NO_X0201) {
2638                             (*oconv)(GETA1, GETA2);
2639                             NEXT;
2640                         } else {
2641                             c2 = X0201;
2642                             c1 &= 0x7f;
2643                             SEND;
2644                         }
2645                     } else if (c1==SSO && iconv != s_iconv) {
2646                         /* EUC X0201 Case */
2647                         c1 = (*i_getc)(f);  /* skip SSO */
2648                         code_status(c1);
2649                         if (SSP<=c1 && c1<0xe0) {
2650                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2651                                 (*oconv)(GETA1, GETA2);
2652                                 NEXT;
2653                             } else {
2654                                 c2 = X0201;
2655                                 c1 &= 0x7f;
2656                                 SEND;
2657                             }
2658                         } else  { /* bogus code, skip SSO and one byte */
2659                             NEXT;
2660                         }
2661                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2662                                (c1 == 0xFD || c1 == 0xFE)) {
2663                         /* CP10001 */
2664                         c2 = X0201;
2665                         c1 &= 0x7f;
2666                         SEND;
2667                     } else {
2668                        /* already established */
2669                        c2 = c1;
2670                        NEXT;
2671                     }
2672                 }
2673             } else if ((c1 > SP) && (c1 != DEL)) {
2674                 /* in case of Roman characters */
2675                 if (shift_mode) {
2676                     /* output 1 shifted byte */
2677                     if (iso8859_f) {
2678                         c2 = ISO8859_1;
2679                         SEND;
2680                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2681                       /* output 1 shifted byte */
2682                         if(iso2022jp_f && x0201_f==NO_X0201) {
2683                             (*oconv)(GETA1, GETA2);
2684                             NEXT;
2685                         } else {
2686                             c2 = X0201;
2687                             SEND;
2688                         }
2689                     } else {
2690                         /* look like bogus code */
2691                         NEXT;
2692                     }
2693                 } else if (input_mode == X0208 || input_mode == X0212 ||
2694                            input_mode == X0213_1 || input_mode == X0213_2) {
2695                     /* in case of Kanji shifted */
2696                     c2 = c1;
2697                     NEXT;
2698                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2699                     /* Check MIME code */
2700                     if ((c1 = (*i_getc)(f)) == EOF) {
2701                         (*oconv)(0, '=');
2702                         LAST;
2703                     } else if (c1 == '?') {
2704                         /* =? is mime conversion start sequence */
2705                         if(mime_f == STRICT_MIME) {
2706                             /* check in real detail */
2707                             if (mime_begin_strict(f) == EOF)
2708                                 LAST;
2709                             else
2710                                 NEXT;
2711                         } else if (mime_begin(f) == EOF)
2712                             LAST;
2713                         else
2714                             NEXT;
2715                     } else {
2716                         (*oconv)(0, '=');
2717                         (*i_ungetc)(c1,f);
2718                         NEXT;
2719                     }
2720                 } else {
2721                     /* normal ASCII code */
2722                     SEND;
2723                 }
2724             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2725                 shift_mode = FALSE;
2726                 NEXT;
2727             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2728                 shift_mode = TRUE;
2729                 NEXT;
2730             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2731                 if ((c1 = (*i_getc)(f)) == EOF) {
2732                     /*  (*oconv)(0, ESC); don't send bogus code */
2733                     LAST;
2734                 } else if (c1 == '$') {
2735                     if ((c1 = (*i_getc)(f)) == EOF) {
2736                         /*
2737                         (*oconv)(0, ESC); don't send bogus code
2738                         (*oconv)(0, '$'); */
2739                         LAST;
2740                     } else if (c1 == '@'|| c1 == 'B') {
2741                         /* This is kanji introduction */
2742                         input_mode = X0208;
2743                         shift_mode = FALSE;
2744                         set_input_codename("ISO-2022-JP");
2745 #ifdef CHECK_OPTION
2746                         debug("ISO-2022-JP");
2747 #endif
2748                         NEXT;
2749                     } else if (c1 == '(') {
2750                         if ((c1 = (*i_getc)(f)) == EOF) {
2751                             /* don't send bogus code
2752                             (*oconv)(0, ESC);
2753                             (*oconv)(0, '$');
2754                             (*oconv)(0, '(');
2755                                 */
2756                             LAST;
2757                         } else if (c1 == '@'|| c1 == 'B') {
2758                             /* This is kanji introduction */
2759                             input_mode = X0208;
2760                             shift_mode = FALSE;
2761                             NEXT;
2762 #ifdef X0212_ENABLE
2763                         } else if (c1 == 'D'){
2764                             input_mode = X0212;
2765                             shift_mode = FALSE;
2766                             NEXT;
2767 #endif /* X0212_ENABLE */
2768                         } else if (c1 == (X0213_1&0x7F)){
2769                             input_mode = X0213_1;
2770                             shift_mode = FALSE;
2771                             NEXT;
2772                         } else if (c1 == (X0213_2&0x7F)){
2773                             input_mode = X0213_2;
2774                             shift_mode = FALSE;
2775                             NEXT;
2776                         } else {
2777                             /* could be some special code */
2778                             (*oconv)(0, ESC);
2779                             (*oconv)(0, '$');
2780                             (*oconv)(0, '(');
2781                             (*oconv)(0, c1);
2782                             NEXT;
2783                         }
2784                     } else if (broken_f&0x2) {
2785                         /* accept any ESC-(-x as broken code ... */
2786                         input_mode = X0208;
2787                         shift_mode = FALSE;
2788                         NEXT;
2789                     } else {
2790                         (*oconv)(0, ESC);
2791                         (*oconv)(0, '$');
2792                         (*oconv)(0, c1);
2793                         NEXT;
2794                     }
2795                 } else if (c1 == '(') {
2796                     if ((c1 = (*i_getc)(f)) == EOF) {
2797                         /* don't send bogus code
2798                         (*oconv)(0, ESC);
2799                         (*oconv)(0, '('); */
2800                         LAST;
2801                     } else {
2802                         if (c1 == 'I') {
2803                             /* This is X0201 kana introduction */
2804                             input_mode = X0201; shift_mode = X0201;
2805                             NEXT;
2806                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2807                             /* This is X0208 kanji introduction */
2808                             input_mode = ASCII; shift_mode = FALSE;
2809                             NEXT;
2810                         } else if (broken_f&0x2) {
2811                             input_mode = ASCII; shift_mode = FALSE;
2812                             NEXT;
2813                         } else {
2814                             (*oconv)(0, ESC);
2815                             (*oconv)(0, '(');
2816                             /* maintain various input_mode here */
2817                             SEND;
2818                         }
2819                     }
2820                } else if ( c1 == 'N' || c1 == 'n'){
2821                    /* SS2 */
2822                    c3 = (*i_getc)(f);  /* skip SS2 */
2823                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2824                        c1 = c3;
2825                        c2 = X0201;
2826                        SEND;
2827                    }else{
2828                        (*i_ungetc)(c3, f);
2829                        /* lonely ESC  */
2830                        (*oconv)(0, ESC);
2831                        SEND;
2832                    }
2833                 } else {
2834                     /* lonely ESC  */
2835                     (*oconv)(0, ESC);
2836                     SEND;
2837                 }
2838             } else if (c1 == ESC && iconv == s_iconv) {
2839                 /* ESC in Shift_JIS */
2840                 if ((c1 = (*i_getc)(f)) == EOF) {
2841                     /*  (*oconv)(0, ESC); don't send bogus code */
2842                     LAST;
2843                 } else if (c1 == '$') {
2844                     /* J-PHONE emoji */
2845                     if ((c1 = (*i_getc)(f)) == EOF) {
2846                         /*
2847                            (*oconv)(0, ESC); don't send bogus code
2848                            (*oconv)(0, '$'); */
2849                         LAST;
2850                     } else {
2851                         if (('E' <= c1 && c1 <= 'G') ||
2852                             ('O' <= c1 && c1 <= 'Q')) {
2853                             /*
2854                                NUM : 0 1 2 3 4 5
2855                                BYTE: G E F O P Q
2856                                C%7 : 1 6 0 2 3 4
2857                                C%7 : 0 1 2 3 4 5 6
2858                                NUM : 2 0 3 4 5 X 1
2859                              */
2860                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2861                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2862                             while ((c1 = (*i_getc)(f)) != EOF) {
2863                                 if (SP <= c1 && c1 <= 'z') {
2864                                     (*oconv)(0, c1 + c0);
2865                                 } else break; /* c1 == SO */
2866                             }
2867                         }
2868                     }
2869                     if (c1 == EOF) LAST;
2870                     NEXT;
2871                 } else {
2872                     /* lonely ESC  */
2873                     (*oconv)(0, ESC);
2874                     SEND;
2875                 }
2876             } else if (c1 == LF || c1 == CR) {
2877                 if (broken_f&4) {
2878                     input_mode = ASCII; set_iconv(FALSE, 0);
2879                     SEND;
2880                 } else if (mime_decode_f && !mime_decode_mode){
2881                     if (c1 == LF) {
2882                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2883                             i_ungetc(SP,f);
2884                             continue;
2885                         } else {
2886                             i_ungetc(c1,f);
2887                         }
2888                         c1 = LF;
2889                         SEND;
2890                     } else  { /* if (c1 == CR)*/
2891                         if ((c1=(*i_getc)(f))!=EOF) {
2892                             if (c1==SP) {
2893                                 i_ungetc(SP,f);
2894                                 continue;
2895                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
2896                                 i_ungetc(SP,f);
2897                                 continue;
2898                             } else {
2899                                 i_ungetc(c1,f);
2900                             }
2901                             i_ungetc(LF,f);
2902                         } else {
2903                             i_ungetc(c1,f);
2904                         }
2905                         c1 = CR;
2906                         SEND;
2907                     }
2908                 }
2909             } else if (c1 == DEL && input_mode == X0208) {
2910                 /* CP5022x */
2911                 c2 = c1;
2912                 NEXT;
2913             } else
2914                 SEND;
2915         }
2916         /* send: */
2917         switch(input_mode){
2918         case ASCII:
2919             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2920             case -2:
2921                 /* 4 bytes UTF-8 */
2922                 if ((c0 = (*i_getc)(f)) != EOF) {
2923                     code_status(c0);
2924                     c0 <<= 8;
2925                     if ((c3 = (*i_getc)(f)) != EOF) {
2926                         code_status(c3);
2927                         (*iconv)(c2, c1, c0|c3);
2928                     }
2929                 }
2930                 break;
2931             case -1:
2932                 /* 3 bytes EUC or UTF-8 */
2933                 if ((c0 = (*i_getc)(f)) != EOF) {
2934                     code_status(c0);
2935                     (*iconv)(c2, c1, c0);
2936                 }
2937                 break;
2938             }
2939             break;
2940         case X0208:
2941         case X0213_1:
2942             if (ms_ucs_map_f &&
2943                 0x7F <= c2 && c2 <= 0x92 &&
2944                 0x21 <= c1 && c1 <= 0x7E) {
2945                 /* CP932 UDC */
2946                 if(c1 == 0x7F) return 0;
2947                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2948                 c2 = 0;
2949             }
2950             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2951             break;
2952 #ifdef X0212_ENABLE
2953         case X0212:
2954             (*oconv)(PREFIX_EUCG3 | c2, c1);
2955             break;
2956 #endif /* X0212_ENABLE */
2957         case X0213_2:
2958             (*oconv)(PREFIX_EUCG3 | c2, c1);
2959             break;
2960         default:
2961             (*oconv)(input_mode, c1);  /* other special case */
2962         }
2963
2964         c2 = 0;
2965         c0 = 0;
2966         continue;
2967         /* goto next_word */
2968     }
2969
2970     /* epilogue */
2971     (*iconv)(EOF, 0, 0);
2972     if (!input_codename)
2973     {
2974         if (is_8bit) {
2975             struct input_code *p = input_code_list;
2976             struct input_code *result = p;
2977             while (p->name){
2978                 if (p->score < result->score) result = p;
2979                 ++p;
2980             }
2981             set_input_codename(result->name);
2982 #ifdef CHECK_OPTION
2983             debug(result->name);
2984 #endif
2985         }
2986     }
2987     return 1;
2988 }
2989
2990 nkf_char
2991 h_conv(FILE *f, nkf_char c2, nkf_char c1)
2992 {
2993     nkf_char ret, c3, c0;
2994     int hold_index;
2995
2996
2997     /** it must NOT be in the kanji shifte sequence      */
2998     /** it must NOT be written in JIS7                   */
2999     /** and it must be after 2 byte 8bit code            */
3000
3001     hold_count = 0;
3002     push_hold_buf(c2);
3003     push_hold_buf(c1);
3004
3005     while ((c1 = (*i_getc)(f)) != EOF) {
3006         if (c1 == ESC){
3007             (*i_ungetc)(c1,f);
3008             break;
3009         }
3010         code_status(c1);
3011         if (push_hold_buf(c1) == EOF || estab_f){
3012             break;
3013         }
3014     }
3015
3016     if (!estab_f){
3017         struct input_code *p = input_code_list;
3018         struct input_code *result = p;
3019         if (c1 == EOF){
3020             code_status(c1);
3021         }
3022         while (p->name){
3023             if (p->status_func && p->score < result->score){
3024                 result = p;
3025             }
3026             ++p;
3027         }
3028         set_iconv(TRUE, result->iconv_func);
3029     }
3030
3031
3032     /** now,
3033      ** 1) EOF is detected, or
3034      ** 2) Code is established, or
3035      ** 3) Buffer is FULL (but last word is pushed)
3036      **
3037      ** in 1) and 3) cases, we continue to use
3038      ** Kanji codes by oconv and leave estab_f unchanged.
3039      **/
3040
3041     ret = c1;
3042     hold_index = 0;
3043     while (hold_index < hold_count){
3044         c2 = hold_buf[hold_index++];
3045         if (c2 <= DEL
3046 #ifdef NUMCHAR_OPTION
3047             || is_unicode_capsule(c2)
3048 #endif
3049             ){
3050             (*iconv)(0, c2, 0);
3051             continue;
3052         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3053             (*iconv)(X0201, c2, 0);
3054             continue;
3055         }
3056         if (hold_index < hold_count){
3057             c1 = hold_buf[hold_index++];
3058         }else{
3059             c1 = (*i_getc)(f);
3060             if (c1 == EOF){
3061                 c3 = EOF;
3062                 break;
3063             }
3064             code_status(c1);
3065         }
3066         c0 = 0;
3067         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3068         case -2:
3069             /* 4 bytes UTF-8 */
3070             if (hold_index < hold_count){
3071                 c0 = hold_buf[hold_index++];
3072             } else if ((c0 = (*i_getc)(f)) == EOF) {
3073                 ret = EOF;
3074                 break;
3075             } else {
3076                 code_status(c0);
3077                 c0 <<= 8;
3078                 if (hold_index < hold_count){
3079                     c3 = hold_buf[hold_index++];
3080                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3081                     c0 = ret = EOF;
3082                     break;
3083                 } else {
3084                     code_status(c3);
3085                     (*iconv)(c2, c1, c0|c3);
3086                 }
3087             }
3088             break;
3089         case -1:
3090             /* 3 bytes EUC or UTF-8 */
3091             if (hold_index < hold_count){
3092                 c0 = hold_buf[hold_index++];
3093             } else if ((c0 = (*i_getc)(f)) == EOF) {
3094                 ret = EOF;
3095                 break;
3096             } else {
3097                 code_status(c0);
3098             }
3099             (*iconv)(c2, c1, c0);
3100             break;
3101         }
3102         if (c0 == EOF) break;
3103     }
3104     return ret;
3105 }
3106
3107 nkf_char push_hold_buf(nkf_char c2)
3108 {
3109     if (hold_count >= HOLD_SIZE*2)
3110         return (EOF);
3111     hold_buf[hold_count++] = (unsigned char)c2;
3112     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3113 }
3114
3115 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3116 {
3117 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3118     nkf_char val;
3119 #endif
3120     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3121 #ifdef SHIFTJIS_CP932
3122     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3123         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3124         if (val){
3125             c2 = val >> 8;
3126             c1 = val & 0xff;
3127         }
3128     }
3129     if (cp932inv_f
3130         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3131         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3132         if (c){
3133             c2 = c >> 8;
3134             c1 = c & 0xff;
3135         }
3136     }
3137 #endif /* SHIFTJIS_CP932 */
3138 #ifdef X0212_ENABLE
3139     if (!x0213_f && is_ibmext_in_sjis(c2)){
3140         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3141         if (val){
3142             if (val > 0x7FFF){
3143                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3144                 c1 = val & 0xff;
3145             }else{
3146                 c2 = val >> 8;
3147                 c1 = val & 0xff;
3148             }
3149             if (p2) *p2 = c2;
3150             if (p1) *p1 = c1;
3151             return 0;
3152         }
3153     }
3154 #endif
3155     if(c2 >= 0x80){
3156         if(x0213_f && c2 >= 0xF0){
3157             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3158                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3159             }else{ /* 78<=k<=94 */
3160                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3161                 if (0x9E < c1) c2++;
3162             }
3163         }else{
3164             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3165             if (0x9E < c1) c2++;
3166         }
3167         if (c1 < 0x9F)
3168             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3169         else {
3170             c1 = c1 - 0x7E;
3171         }
3172     }
3173
3174 #ifdef X0212_ENABLE
3175     c2 = x0212_unshift(c2);
3176 #endif
3177     if (p2) *p2 = c2;
3178     if (p1) *p1 = c1;
3179     return 0;
3180 }
3181
3182 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3183 {
3184     if (c2 == X0201) {
3185         c1 &= 0x7f;
3186     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3187         /* NOP */
3188     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3189         /* CP932 UDC */
3190         if(c1 == 0x7F) return 0;
3191         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3192         c2 = 0;
3193     } else {
3194         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3195         if (ret) return ret;
3196     }
3197     (*oconv)(c2, c1);
3198     return 0;
3199 }
3200
3201 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3202 {
3203     if (c2 == X0201) {
3204         c1 &= 0x7f;
3205 #ifdef X0212_ENABLE
3206     }else if (c2 == 0x8f){
3207         if (c0 == 0){
3208             return -1;
3209         }
3210         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3211             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3212             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3213             c2 = 0;
3214         } else {
3215             c2 = (c2 << 8) | (c1 & 0x7f);
3216             c1 = c0 & 0x7f;
3217 #ifdef SHIFTJIS_CP932
3218             if (cp51932_f){
3219                 nkf_char s2, s1;
3220                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3221                     s2e_conv(s2, s1, &c2, &c1);
3222                     if (c2 < 0x100){
3223                         c1 &= 0x7f;
3224                         c2 &= 0x7f;
3225                     }
3226                 }
3227             }
3228 #endif /* SHIFTJIS_CP932 */
3229         }
3230 #endif /* X0212_ENABLE */
3231     } else if (c2 == SSO){
3232         c2 = X0201;
3233         c1 &= 0x7f;
3234     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3235         /* NOP */
3236     } else {
3237         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3238             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3239             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3240             c2 = 0;
3241         } else {
3242             c1 &= 0x7f;
3243             c2 &= 0x7f;
3244 #ifdef SHIFTJIS_CP932
3245             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3246                 nkf_char s2, s1;
3247                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3248                     s2e_conv(s2, s1, &c2, &c1);
3249                     if (c2 < 0x100){
3250                         c1 &= 0x7f;
3251                         c2 &= 0x7f;
3252                     }
3253                 }
3254             }
3255 #endif /* SHIFTJIS_CP932 */
3256         }
3257     }
3258     (*oconv)(c2, c1);
3259     return 0;
3260 }
3261
3262 #ifdef UTF8_INPUT_ENABLE
3263 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3264 {
3265     nkf_char ret = 0;
3266
3267     if (!c1){
3268         *p2 = 0;
3269         *p1 = c2;
3270     }else if (0xc0 <= c2 && c2 <= 0xef) {
3271         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3272 #ifdef NUMCHAR_OPTION
3273         if (ret > 0){
3274             if (p2) *p2 = 0;
3275             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3276             ret = 0;
3277         }
3278 #endif
3279     }
3280     return ret;
3281 }
3282
3283 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3284 {
3285     nkf_char ret = 0;
3286     static const char w_iconv_utf8_1st_byte[] =
3287     { /* 0xC0 - 0xFF */
3288         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3289         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3290         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3291         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3292
3293     if (c2 < 0 || 0xff < c2) {
3294     }else if (c2 == 0) { /* 0 : 1 byte*/
3295         c0 = 0;
3296     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3297         return 0;
3298     } else{
3299         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3300         case 21:
3301             if (c1 < 0x80 || 0xBF < c1) return 0;
3302             break;
3303         case 30:
3304             if (c0 == 0) return -1;
3305             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3306                 return 0;
3307             break;
3308         case 31:
3309         case 33:
3310             if (c0 == 0) return -1;
3311             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3312                 return 0;
3313             break;
3314         case 32:
3315             if (c0 == 0) return -1;
3316             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3317                 return 0;
3318             break;
3319         case 40:
3320             if (c0 == 0) return -2;
3321             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3322                 return 0;
3323             break;
3324         case 41:
3325             if (c0 == 0) return -2;
3326             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3327                 return 0;
3328             break;
3329         case 42:
3330             if (c0 == 0) return -2;
3331             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3332                 return 0;
3333             break;
3334         default:
3335             return 0;
3336             break;
3337         }
3338     }
3339     if (c2 == 0 || c2 == EOF){
3340     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3341         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3342         c2 = 0;
3343     } else {
3344         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3345     }
3346     if (ret == 0){
3347         (*oconv)(c2, c1);
3348     }
3349     return ret;
3350 }
3351 #endif
3352
3353 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3354 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3355 {
3356     val &= VALUE_MASK;
3357     if (val < 0x80){
3358         *p2 = val;
3359         *p1 = 0;
3360         *p0 = 0;
3361     }else if (val < 0x800){
3362         *p2 = 0xc0 | (val >> 6);
3363         *p1 = 0x80 | (val & 0x3f);
3364         *p0 = 0;
3365     } else if (val <= NKF_INT32_C(0xFFFF)) {
3366         *p2 = 0xe0 | (val >> 12);
3367         *p1 = 0x80 | ((val >> 6) & 0x3f);
3368         *p0 = 0x80 | (val        & 0x3f);
3369     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3370         *p2 = 0xe0 |  (val >> 16);
3371         *p1 = 0x80 | ((val >> 12) & 0x3f);
3372         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3373     } else {
3374         *p2 = 0;
3375         *p1 = 0;
3376         *p0 = 0;
3377     }
3378 }
3379 #endif
3380
3381 #ifdef UTF8_INPUT_ENABLE
3382 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3383 {
3384     nkf_char val;
3385     if (c2 >= 0xf8) {
3386         val = -1;
3387     } else if (c2 >= 0xf0){
3388         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3389         val = (c2 & 0x0f) << 18;
3390         val |= (c1 & 0x3f) << 12;
3391         val |= (c0 & 0x3f00) >> 2;
3392         val |= (c0 & 0x3f);
3393     }else if (c2 >= 0xe0){
3394         val = (c2 & 0x0f) << 12;
3395         val |= (c1 & 0x3f) << 6;
3396         val |= (c0 & 0x3f);
3397     }else if (c2 >= 0xc0){
3398         val = (c2 & 0x1f) << 6;
3399         val |= (c1 & 0x3f);
3400     }else{
3401         val = c2;
3402     }
3403     return val;
3404 }
3405
3406 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3407 {
3408     nkf_char c2, c1, c0;
3409     nkf_char ret = 0;
3410     val &= VALUE_MASK;
3411     if (val < 0x80){
3412         *p2 = 0;
3413         *p1 = val;
3414     }else{
3415         w16w_conv(val, &c2, &c1, &c0);
3416         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3417 #ifdef NUMCHAR_OPTION
3418         if (ret > 0){
3419             *p2 = 0;
3420             *p1 = CLASS_UNICODE | val;
3421             ret = 0;
3422         }
3423 #endif
3424     }
3425     return ret;
3426 }
3427 #endif
3428
3429 #ifdef UTF8_INPUT_ENABLE
3430 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3431 {
3432     nkf_char ret = 0;
3433     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3434         (*oconv)(c2, c1);
3435         return 0;
3436     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3437         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3438             return -2;
3439         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3440         c2 = 0;
3441     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3442         /*
3443            return 2;
3444         */
3445         return 1;
3446     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3447     if (ret) return ret;
3448     (*oconv)(c2, c1);
3449     return 0;
3450 }
3451
3452 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3453 {
3454     int ret = 0;
3455
3456     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3457     } else if (is_unicode_bmp(c1)) {
3458         ret = w16e_conv(c1, &c2, &c1);
3459     } else {
3460         c2 = 0;
3461         c1 =  CLASS_UNICODE | c1;
3462     }
3463     if (ret) return ret;
3464     (*oconv)(c2, c1);
3465     return 0;
3466 }
3467
3468 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3469 {
3470     const unsigned short *const *pp;
3471     const unsigned short *const *const *ppp;
3472     static const char no_best_fit_chars_table_C2[] =
3473     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3474         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3475         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3476         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3477     static const char no_best_fit_chars_table_C2_ms[] =
3478     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3479         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3480         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3481         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3482     static const char no_best_fit_chars_table_932_C2[] =
3483     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3484         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3485         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3486         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3487     static const char no_best_fit_chars_table_932_C3[] =
3488     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3489         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3490         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3491         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3492     nkf_char ret = 0;
3493
3494     if(c2 < 0x80){
3495         *p2 = 0;
3496         *p1 = c2;
3497     }else if(c2 < 0xe0){
3498         if(no_best_fit_chars_f){
3499             if(ms_ucs_map_f == UCS_MAP_CP932){
3500                 switch(c2){
3501                 case 0xC2:
3502                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3503                     break;
3504                 case 0xC3:
3505                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3506                     break;
3507                 }
3508             }else if(!cp932inv_f){
3509                 switch(c2){
3510                 case 0xC2:
3511                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3512                     break;
3513                 case 0xC3:
3514                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3515                     break;
3516                 }
3517             }else if(ms_ucs_map_f == UCS_MAP_MS){
3518                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3519             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3520                 switch(c2){
3521                 case 0xC2:
3522                     switch(c1){
3523                     case 0xA2:
3524                     case 0xA3:
3525                     case 0xA5:
3526                     case 0xA6:
3527                     case 0xAC:
3528                     case 0xAF:
3529                     case 0xB8:
3530                         return 1;
3531                     }
3532                     break;
3533                 }
3534             }
3535         }
3536         pp =
3537             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3538             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3539             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3540             utf8_to_euc_2bytes;
3541         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3542     }else if(c0 < 0xF0){
3543         if(no_best_fit_chars_f){
3544             if(ms_ucs_map_f == UCS_MAP_CP932){
3545                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3546             }else if(ms_ucs_map_f == UCS_MAP_MS){
3547                 switch(c2){
3548                 case 0xE2:
3549                     switch(c1){
3550                     case 0x80:
3551                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3552                         break;
3553                     case 0x88:
3554                         if(c0 == 0x92) return 1;
3555                         break;
3556                     }
3557                     break;
3558                 case 0xE3:
3559                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3560                     break;
3561                 }
3562             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3563                 switch(c2){
3564                 case 0xE3:
3565                     switch(c1){
3566                     case 0x82:
3567                             if(c0 == 0x94) return 1;
3568                         break;
3569                     case 0x83:
3570                             if(c0 == 0xBB) return 1;
3571                         break;
3572                     }
3573                     break;
3574                 }
3575             }else{
3576                 switch(c2){
3577                 case 0xE2:
3578                     switch(c1){
3579                     case 0x80:
3580                         if(c0 == 0x95) return 1;
3581                         break;
3582                     case 0x88:
3583                         if(c0 == 0xA5) return 1;
3584                         break;
3585                     }
3586                     break;
3587                 case 0xEF:
3588                     switch(c1){
3589                     case 0xBC:
3590                         if(c0 == 0x8D) return 1;
3591                         break;
3592                     case 0xBD:
3593                         if(c0 == 0x9E && !cp932inv_f) return 1;
3594                         break;
3595                     case 0xBF:
3596                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3597                         break;
3598                     }
3599                     break;
3600                 }
3601             }
3602         }
3603         ppp =
3604             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3605             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3606             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3607             utf8_to_euc_3bytes;
3608         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3609     }else return -1;
3610 #ifdef SHIFTJIS_CP932
3611     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3612         nkf_char s2, s1;
3613         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3614             s2e_conv(s2, s1, p2, p1);
3615         }else{
3616             ret = 1;
3617         }
3618     }
3619 #endif
3620     return ret;
3621 }
3622
3623 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3624 {
3625     nkf_char c2;
3626     const unsigned short *p;
3627     unsigned short val;
3628
3629     if (pp == 0) return 1;
3630
3631     c1 -= 0x80;
3632     if (c1 < 0 || psize <= c1) return 1;
3633     p = pp[c1];
3634     if (p == 0)  return 1;
3635
3636     c0 -= 0x80;
3637     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3638     val = p[c0];
3639     if (val == 0) return 1;
3640     if (no_cp932ext_f && (
3641         (val>>8) == 0x2D || /* NEC special characters */
3642         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3643         )) return 1;
3644
3645     c2 = val >> 8;
3646    if (val > 0x7FFF){
3647         c2 &= 0x7f;
3648         c2 |= PREFIX_EUCG3;
3649     }
3650     if (c2 == SO) c2 = X0201;
3651     c1 = val & 0x7f;
3652     if (p2) *p2 = c2;
3653     if (p1) *p1 = c1;
3654     return 0;
3655 }
3656
3657 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3658 {
3659     int shift = 20;
3660     c &= VALUE_MASK;
3661     while(shift >= 0){
3662         if(c >= 1<<shift){
3663             while(shift >= 0){
3664                 (*f)(0, bin2hex(c>>shift));
3665                 shift -= 4;
3666             }
3667         }else{
3668             shift -= 4;
3669         }
3670     }
3671     return;
3672 }
3673
3674 void encode_fallback_html(nkf_char c)
3675 {
3676     (*oconv)(0, '&');
3677     (*oconv)(0, '#');
3678     c &= VALUE_MASK;
3679     if(c >= NKF_INT32_C(1000000))
3680         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3681     if(c >= NKF_INT32_C(100000))
3682         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3683     if(c >= 10000)
3684         (*oconv)(0, 0x30+(c/10000  )%10);
3685     if(c >= 1000)
3686         (*oconv)(0, 0x30+(c/1000   )%10);
3687     if(c >= 100)
3688         (*oconv)(0, 0x30+(c/100    )%10);
3689     if(c >= 10)
3690         (*oconv)(0, 0x30+(c/10     )%10);
3691     if(c >= 0)
3692         (*oconv)(0, 0x30+ c         %10);
3693     (*oconv)(0, ';');
3694     return;
3695 }
3696
3697 void encode_fallback_xml(nkf_char c)
3698 {
3699     (*oconv)(0, '&');
3700     (*oconv)(0, '#');
3701     (*oconv)(0, 'x');
3702     nkf_each_char_to_hex(oconv, c);
3703     (*oconv)(0, ';');
3704     return;
3705 }
3706
3707 void encode_fallback_java(nkf_char c)
3708 {
3709     (*oconv)(0, '\\');
3710     c &= VALUE_MASK;
3711     if(!is_unicode_bmp(c)){
3712         (*oconv)(0, 'U');
3713         (*oconv)(0, '0');
3714         (*oconv)(0, '0');
3715         (*oconv)(0, bin2hex(c>>20));
3716         (*oconv)(0, bin2hex(c>>16));
3717     }else{
3718         (*oconv)(0, 'u');
3719     }
3720     (*oconv)(0, bin2hex(c>>12));
3721     (*oconv)(0, bin2hex(c>> 8));
3722     (*oconv)(0, bin2hex(c>> 4));
3723     (*oconv)(0, bin2hex(c    ));
3724     return;
3725 }
3726
3727 void encode_fallback_perl(nkf_char c)
3728 {
3729     (*oconv)(0, '\\');
3730     (*oconv)(0, 'x');
3731     (*oconv)(0, '{');
3732     nkf_each_char_to_hex(oconv, c);
3733     (*oconv)(0, '}');
3734     return;
3735 }
3736
3737 void encode_fallback_subchar(nkf_char c)
3738 {
3739     c = unicode_subchar;
3740     (*oconv)((c>>8)&0xFF, c&0xFF);
3741     return;
3742 }
3743 #endif
3744
3745 #ifdef UTF8_OUTPUT_ENABLE
3746 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3747 {
3748     const unsigned short *p;
3749
3750     if (c2 == X0201) {
3751         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3752             switch (c1) {
3753             case 0x20:
3754                 return 0xA0;
3755             case 0x7D:
3756                 return 0xA9;
3757             }
3758         }
3759         p = euc_to_utf8_1byte;
3760 #ifdef X0212_ENABLE
3761     } else if (is_eucg3(c2)){
3762         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3763             return 0xA6;
3764         }
3765         c2 = (c2&0x7f) - 0x21;
3766         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3767             p = x0212_to_utf8_2bytes[c2];
3768         else
3769             return 0;
3770 #endif
3771     } else {
3772         c2 &= 0x7f;
3773         c2 = (c2&0x7f) - 0x21;
3774         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3775             p =
3776                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3777                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3778                 euc_to_utf8_2bytes_ms[c2];
3779         else
3780             return 0;
3781     }
3782     if (!p) return 0;
3783     c1 = (c1 & 0x7f) - 0x21;
3784     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3785         return p[c1];
3786     return 0;
3787 }
3788
3789 void w_oconv(nkf_char c2, nkf_char c1)
3790 {
3791     nkf_char c0;
3792     nkf_char val;
3793
3794     if (output_bom_f) {
3795         output_bom_f = FALSE;
3796         (*o_putc)('\357');
3797         (*o_putc)('\273');
3798         (*o_putc)('\277');
3799     }
3800
3801     if (c2 == EOF) {
3802         (*o_putc)(EOF);
3803         return;
3804     }
3805
3806 #ifdef NUMCHAR_OPTION
3807     if (c2 == 0 && is_unicode_capsule(c1)){
3808         val = c1 & VALUE_MASK;
3809         if (val < 0x80){
3810             (*o_putc)(val);
3811         }else if (val < 0x800){
3812             (*o_putc)(0xC0 | (val >> 6));
3813             (*o_putc)(0x80 | (val & 0x3f));
3814         } else if (val <= NKF_INT32_C(0xFFFF)) {
3815             (*o_putc)(0xE0 | (val >> 12));
3816             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3817             (*o_putc)(0x80 | (val        & 0x3f));
3818         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3819             (*o_putc)(0xF0 | ( val>>18));
3820             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3821             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3822             (*o_putc)(0x80 | ( val      & 0x3f));
3823         }
3824         return;
3825     }
3826 #endif
3827
3828     if (c2 == 0) {
3829         output_mode = ASCII;
3830         (*o_putc)(c1);
3831     } else if (c2 == ISO8859_1) {
3832         output_mode = ISO8859_1;
3833         (*o_putc)(c1 | 0x080);
3834     } else {
3835         output_mode = UTF8;
3836         val = e2w_conv(c2, c1);
3837         if (val){
3838             w16w_conv(val, &c2, &c1, &c0);
3839             (*o_putc)(c2);
3840             if (c1){
3841                 (*o_putc)(c1);
3842                 if (c0) (*o_putc)(c0);
3843             }
3844         }
3845     }
3846 }
3847
3848 void w_oconv16(nkf_char c2, nkf_char c1)
3849 {
3850     if (output_bom_f) {
3851         output_bom_f = FALSE;
3852         if (output_endian == ENDIAN_LITTLE){
3853             (*o_putc)((unsigned char)'\377');
3854             (*o_putc)('\376');
3855         }else{
3856             (*o_putc)('\376');
3857             (*o_putc)((unsigned char)'\377');
3858         }
3859     }
3860
3861     if (c2 == EOF) {
3862         (*o_putc)(EOF);
3863         return;
3864     }
3865
3866     if (c2 == ISO8859_1) {
3867         c2 = 0;
3868         c1 |= 0x80;
3869 #ifdef NUMCHAR_OPTION
3870     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3871         if (is_unicode_bmp(c1)) {
3872             c2 = (c1 >> 8) & 0xff;
3873             c1 &= 0xff;
3874         } else {
3875             c1 &= VALUE_MASK;
3876             if (c1 <= UNICODE_MAX) {
3877                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3878                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3879                 if (output_endian == ENDIAN_LITTLE){
3880                     (*o_putc)(c2 & 0xff);
3881                     (*o_putc)((c2 >> 8) & 0xff);
3882                     (*o_putc)(c1 & 0xff);
3883                     (*o_putc)((c1 >> 8) & 0xff);
3884                 }else{
3885                     (*o_putc)((c2 >> 8) & 0xff);
3886                     (*o_putc)(c2 & 0xff);
3887                     (*o_putc)((c1 >> 8) & 0xff);
3888                     (*o_putc)(c1 & 0xff);
3889                 }
3890             }
3891             return;
3892         }
3893 #endif
3894     } else if (c2) {
3895         nkf_char val = e2w_conv(c2, c1);
3896         c2 = (val >> 8) & 0xff;
3897         c1 = val & 0xff;
3898         if (!val) return;
3899     }
3900     if (output_endian == ENDIAN_LITTLE){
3901         (*o_putc)(c1);
3902         (*o_putc)(c2);
3903     }else{
3904         (*o_putc)(c2);
3905         (*o_putc)(c1);
3906     }
3907 }
3908
3909 void w_oconv32(nkf_char c2, nkf_char c1)
3910 {
3911     if (output_bom_f) {
3912         output_bom_f = FALSE;
3913         if (output_endian == ENDIAN_LITTLE){
3914             (*o_putc)((unsigned char)'\377');
3915             (*o_putc)('\376');
3916             (*o_putc)('\000');
3917             (*o_putc)('\000');
3918         }else{
3919             (*o_putc)('\000');
3920             (*o_putc)('\000');
3921             (*o_putc)('\376');
3922             (*o_putc)((unsigned char)'\377');
3923         }
3924     }
3925
3926     if (c2 == EOF) {
3927         (*o_putc)(EOF);
3928         return;
3929     }
3930
3931     if (c2 == ISO8859_1) {
3932         c1 |= 0x80;
3933 #ifdef NUMCHAR_OPTION
3934     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3935         c1 &= VALUE_MASK;
3936 #endif
3937     } else if (c2) {
3938         c1 = e2w_conv(c2, c1);
3939         if (!c1) return;
3940     }
3941     if (output_endian == ENDIAN_LITTLE){
3942         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3943         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3944         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3945         (*o_putc)('\000');
3946     }else{
3947         (*o_putc)('\000');
3948         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3949         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3950         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3951     }
3952 }
3953 #endif
3954
3955 void e_oconv(nkf_char c2, nkf_char c1)
3956 {
3957 #ifdef NUMCHAR_OPTION
3958     if (c2 == 0 && is_unicode_capsule(c1)){
3959         w16e_conv(c1, &c2, &c1);
3960         if (c2 == 0 && is_unicode_capsule(c1)){
3961             c2 = c1 & VALUE_MASK;
3962             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
3963                 /* eucJP-ms UDC */
3964                 c1 &= 0xFFF;
3965                 c2 = c1 / 94;
3966                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
3967                 c1 = 0x21 + c1 % 94;
3968                 if (is_eucg3(c2)){
3969                     (*o_putc)(0x8f);
3970                     (*o_putc)((c2 & 0x7f) | 0x080);
3971                     (*o_putc)(c1 | 0x080);
3972                 }else{
3973                     (*o_putc)((c2 & 0x7f) | 0x080);
3974                     (*o_putc)(c1 | 0x080);
3975                 }
3976                 return;
3977             } else {
3978                 if (encode_fallback) (*encode_fallback)(c1);
3979                 return;
3980             }
3981         }
3982     }
3983 #endif
3984     if (c2 == EOF) {
3985         (*o_putc)(EOF);
3986         return;
3987     } else if (c2 == 0) {
3988         output_mode = ASCII;
3989         (*o_putc)(c1);
3990     } else if (c2 == X0201) {
3991         output_mode = JAPANESE_EUC;
3992         (*o_putc)(SSO); (*o_putc)(c1|0x80);
3993     } else if (c2 == ISO8859_1) {
3994         output_mode = ISO8859_1;
3995         (*o_putc)(c1 | 0x080);
3996 #ifdef X0212_ENABLE
3997     } else if (is_eucg3(c2)){
3998         output_mode = JAPANESE_EUC;
3999 #ifdef SHIFTJIS_CP932
4000         if (!cp932inv_f){
4001             nkf_char s2, s1;
4002             if (e2s_conv(c2, c1, &s2, &s1) == 0){
4003                 s2e_conv(s2, s1, &c2, &c1);
4004             }
4005         }
4006 #endif
4007         if (c2 == 0) {
4008             output_mode = ASCII;
4009             (*o_putc)(c1);
4010         }else if (is_eucg3(c2)){
4011             if (x0212_f){
4012                 (*o_putc)(0x8f);
4013                 (*o_putc)((c2 & 0x7f) | 0x080);
4014                 (*o_putc)(c1 | 0x080);
4015             }
4016         }else{
4017             (*o_putc)((c2 & 0x7f) | 0x080);
4018             (*o_putc)(c1 | 0x080);
4019         }
4020 #endif
4021     } else {
4022         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
4023             set_iconv(FALSE, 0);
4024             return; /* too late to rescue this char */
4025         }
4026         output_mode = JAPANESE_EUC;
4027         (*o_putc)(c2 | 0x080);
4028         (*o_putc)(c1 | 0x080);
4029     }
4030 }
4031
4032 #ifdef X0212_ENABLE
4033 nkf_char x0212_shift(nkf_char c)
4034 {
4035     nkf_char ret = c;
4036     c &= 0x7f;
4037     if (is_eucg3(ret)){
4038         if (0x75 <= c && c <= 0x7f){
4039             ret = c + (0x109 - 0x75);
4040         }
4041     }else{
4042         if (0x75 <= c && c <= 0x7f){
4043             ret = c + (0x113 - 0x75);
4044         }
4045     }
4046     return ret;
4047 }
4048
4049
4050 nkf_char x0212_unshift(nkf_char c)
4051 {
4052     nkf_char ret = c;
4053     if (0x7f <= c && c <= 0x88){
4054         ret = c + (0x75 - 0x7f);
4055     }else if (0x89 <= c && c <= 0x92){
4056         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4057     }
4058     return ret;
4059 }
4060 #endif /* X0212_ENABLE */
4061
4062 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4063 {
4064     nkf_char ndx;
4065     if (is_eucg3(c2)){
4066         ndx = c2 & 0x7f;
4067         if (x0213_f){
4068             if((0x21 <= ndx && ndx <= 0x2F)){
4069                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4070                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4071                 return 0;
4072             }else if(0x6E <= ndx && ndx <= 0x7E){
4073                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4074                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4075                 return 0;
4076             }
4077             return 1;
4078         }
4079 #ifdef X0212_ENABLE
4080         else if(nkf_isgraph(ndx)){
4081             nkf_char val = 0;
4082             const unsigned short *ptr;
4083             ptr = x0212_shiftjis[ndx - 0x21];
4084             if (ptr){
4085                 val = ptr[(c1 & 0x7f) - 0x21];
4086             }
4087             if (val){
4088                 c2 = val >> 8;
4089                 c1 = val & 0xff;
4090                 if (p2) *p2 = c2;
4091                 if (p1) *p1 = c1;
4092                 return 0;
4093             }
4094             c2 = x0212_shift(c2);
4095         }
4096 #endif /* X0212_ENABLE */
4097     }
4098     if(0x7F < c2) return 1;
4099     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4100     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4101     return 0;
4102 }
4103
4104 void s_oconv(nkf_char c2, nkf_char c1)
4105 {
4106 #ifdef NUMCHAR_OPTION
4107     if (c2 == 0 && is_unicode_capsule(c1)){
4108         w16e_conv(c1, &c2, &c1);
4109         if (c2 == 0 && is_unicode_capsule(c1)){
4110             c2 = c1 & VALUE_MASK;
4111             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
4112                 /* CP932 UDC */
4113                 c1 &= 0xFFF;
4114                 c2 = c1 / 188 + 0xF0;
4115                 c1 = c1 % 188;
4116                 c1 += 0x40 + (c1 > 0x3e);
4117                 (*o_putc)(c2);
4118                 (*o_putc)(c1);
4119                 return;
4120             } else {
4121                 if(encode_fallback)(*encode_fallback)(c1);
4122                 return;
4123             }
4124         }
4125     }
4126 #endif
4127     if (c2 == EOF) {
4128         (*o_putc)(EOF);
4129         return;
4130     } else if (c2 == 0) {
4131         output_mode = ASCII;
4132         (*o_putc)(c1);
4133     } else if (c2 == X0201) {
4134         output_mode = SHIFT_JIS;
4135         (*o_putc)(c1|0x80);
4136     } else if (c2 == ISO8859_1) {
4137         output_mode = ISO8859_1;
4138         (*o_putc)(c1 | 0x080);
4139 #ifdef X0212_ENABLE
4140     } else if (is_eucg3(c2)){
4141         output_mode = SHIFT_JIS;
4142         if (e2s_conv(c2, c1, &c2, &c1) == 0){
4143             (*o_putc)(c2);
4144             (*o_putc)(c1);
4145         }
4146 #endif
4147     } else {
4148         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4149             set_iconv(FALSE, 0);
4150             return; /* too late to rescue this char */
4151         }
4152         output_mode = SHIFT_JIS;
4153         e2s_conv(c2, c1, &c2, &c1);
4154
4155 #ifdef SHIFTJIS_CP932
4156         if (cp932inv_f
4157             && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
4158             nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
4159             if (c){
4160                 c2 = c >> 8;
4161                 c1 = c & 0xff;
4162             }
4163         }
4164 #endif /* SHIFTJIS_CP932 */
4165
4166         (*o_putc)(c2);
4167         if (prefix_table[(unsigned char)c1]){
4168             (*o_putc)(prefix_table[(unsigned char)c1]);
4169         }
4170         (*o_putc)(c1);
4171     }
4172 }
4173
4174 void j_oconv(nkf_char c2, nkf_char c1)
4175 {
4176 #ifdef NUMCHAR_OPTION
4177     if (c2 == 0 && is_unicode_capsule(c1)){
4178         w16e_conv(c1, &c2, &c1);
4179         if (c2 == 0 && is_unicode_capsule(c1)){
4180             c2 = c1 & VALUE_MASK;
4181             if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
4182                 /* CP5022x UDC */
4183                 c1 &= 0xFFF;
4184                 c2 = 0x7F + c1 / 94;
4185                 c1 = 0x21 + c1 % 94;
4186             } else {
4187                 if (encode_fallback) (*encode_fallback)(c1);
4188                 return;
4189             }
4190         }
4191     }
4192 #endif
4193     if (c2 == EOF) {
4194         if (output_mode !=ASCII && output_mode!=ISO8859_1) {
4195             (*o_putc)(ESC);
4196             (*o_putc)('(');
4197             (*o_putc)(ascii_intro);
4198             output_mode = ASCII;
4199         }
4200         (*o_putc)(EOF);
4201 #ifdef X0212_ENABLE
4202     } else if (is_eucg3(c2)){
4203         if(x0213_f){
4204             if(output_mode!=X0213_2){
4205                 output_mode = X0213_2;