OSDN Git Service

* Add -g=0, -g=1, --guess=0, --guess=1.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.143 2007/10/10 19:35:39 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-10-11"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
43 #define MSDOS
44 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
45 #define __WIN32__
46 #endif
47 #endif
48
49 #ifdef PERL_XS
50 #undef OVERWRITE
51 #endif
52
53 #ifndef PERL_XS
54 #include <stdio.h>
55 #endif
56
57 #include <stdlib.h>
58 #include <string.h>
59
60 #if defined(MSDOS) || defined(__OS2__)
61 #include <fcntl.h>
62 #include <io.h>
63 #if defined(_MSC_VER) || defined(__WATCOMC__)
64 #define mktemp _mktemp
65 #endif
66 #endif
67
68 #ifdef MSDOS
69 #ifdef LSI_C
70 #define setbinmode(fp) fsetbin(fp)
71 #elif defined(__DJGPP__)
72 #include <libc/dosio.h>
73 #define setbinmode(fp) djgpp_setbinmode(fp)
74 #else /* Microsoft C, Turbo C */
75 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
76 #endif
77 #else /* UNIX */
78 #define setbinmode(fp)
79 #endif
80
81 #if defined(__DJGPP__)
82 void  djgpp_setbinmode(FILE *fp)
83 {
84     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
85     int fd, m;
86     fd = fileno(fp);
87     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
88     __file_handle_set(fd, m);
89 }
90 #endif
91
92 #ifdef _IOFBF /* SysV and MSDOS, Windows */
93 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
94 #else /* BSD */
95 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
96 #endif
97
98 /*Borland C++ 4.5 EasyWin*/
99 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
100 #define         EASYWIN
101 #ifndef __WIN16__
102 #define __WIN16__
103 #endif
104 #include <windows.h>
105 #endif
106
107 #ifdef OVERWRITE
108 /* added by satoru@isoternet.org */
109 #if defined(__EMX__)
110 #include <sys/types.h>
111 #endif
112 #include <sys/stat.h>
113 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
114 #include <unistd.h>
115 #if defined(__WATCOMC__)
116 #include <sys/utime.h>
117 #else
118 #include <utime.h>
119 #endif
120 #else /* defined(MSDOS) */
121 #ifdef __WIN32__
122 #ifdef __BORLANDC__ /* BCC32 */
123 #include <utime.h>
124 #else /* !defined(__BORLANDC__) */
125 #include <sys/utime.h>
126 #endif /* (__BORLANDC__) */
127 #else /* !defined(__WIN32__) */
128 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
129 #include <sys/utime.h>
130 #elif defined(__TURBOC__) /* BCC */
131 #include <utime.h>
132 #elif defined(LSI_C) /* LSI C */
133 #endif /* (__WIN32__) */
134 #endif
135 #endif
136 #endif
137
138 #define         FALSE   0
139 #define         TRUE    1
140
141 /* state of output_mode and input_mode
142
143    c2           0 means ASCII
144                 X0201
145                 ISO8859_1
146                 X0208
147                 EOF      all termination
148    c1           32bit data
149
150  */
151
152 #define         ASCII           0
153 #define         X0208           1
154 #define         X0201           2
155 #define         ISO8859_1       8
156 #define         NO_X0201        3
157 #define         X0212      0x2844
158 #define         X0213_1    0x284F
159 #define         X0213_2    0x2850
160
161 /* Input Assumption */
162
163 #define         JIS_INPUT       4
164 #define         EUC_INPUT      16
165 #define         SJIS_INPUT      5
166 #define         LATIN1_INPUT    6
167 #define         FIXED_MIME      7
168 #define         STRICT_MIME     8
169
170 /* MIME ENCODE */
171
172 #define         ISO2022JP       9
173 #define         JAPANESE_EUC   10
174 #define         SHIFT_JIS      11
175
176 #define         UTF8           12
177 #define         UTF8_INPUT     13
178 #define         UTF16_INPUT    1015
179 #define         UTF32_INPUT    1017
180
181 /* byte order */
182
183 #define         ENDIAN_BIG      1234
184 #define         ENDIAN_LITTLE   4321
185 #define         ENDIAN_2143     2143
186 #define         ENDIAN_3412     3412
187
188 #define         WISH_TRUE      15
189
190 /* ASCII CODE */
191
192 #define         BS      0x08
193 #define         TAB     0x09
194 #define         LF      0x0a
195 #define         CR      0x0d
196 #define         ESC     0x1b
197 #define         SP      0x20
198 #define         AT      0x40
199 #define         SSP     0xa0
200 #define         DEL     0x7f
201 #define         SI      0x0f
202 #define         SO      0x0e
203 #define         SSO     0x8e
204 #define         SS3     0x8f
205 #define         CRLF    0x0D0A
206
207 #define         is_alnum(c)  \
208             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
209
210 /* I don't trust portablity of toupper */
211 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
212 #define nkf_isoctal(c)  ('0'<=c && c<='7')
213 #define nkf_isdigit(c)  ('0'<=c && c<='9')
214 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
215 #define nkf_isblank(c) (c == SP || c == TAB)
216 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
217 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
218 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
219 #define nkf_isprint(c) (SP<=c && c<='~')
220 #define nkf_isgraph(c) ('!'<=c && c<='~')
221 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
222                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
223                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
224 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
225 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
226
227 #define CP932_TABLE_BEGIN 0xFA
228 #define CP932_TABLE_END   0xFC
229 #define CP932INV_TABLE_BEGIN 0xED
230 #define CP932INV_TABLE_END   0xEE
231 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
232
233 #define         HOLD_SIZE       1024
234 #if defined(INT_IS_SHORT)
235 #define         IOBUF_SIZE      2048
236 #else
237 #define         IOBUF_SIZE      16384
238 #endif
239
240 #define         DEFAULT_J       'B'
241 #define         DEFAULT_R       'B'
242
243 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
244 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
245
246 #define         RANGE_NUM_MAX   18
247 #define         GETA1   0x22
248 #define         GETA2   0x2e
249
250
251 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
252 #define sizeof_euc_to_utf8_1byte 94
253 #define sizeof_euc_to_utf8_2bytes 94
254 #define sizeof_utf8_to_euc_C2 64
255 #define sizeof_utf8_to_euc_E5B8 64
256 #define sizeof_utf8_to_euc_2bytes 112
257 #define sizeof_utf8_to_euc_3bytes 16
258 #endif
259
260 /* MIME preprocessor */
261
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
264 #endif
265
266 struct input_code{
267     char *name;
268     nkf_char stat;
269     nkf_char score;
270     nkf_char index;
271     nkf_char buf[3];
272     void (*status_func)(struct input_code *, nkf_char);
273     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
274     int _file_stat;
275 };
276
277 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
278
279 #ifndef PERL_XS
280 static const char *CopyRight = COPY_RIGHT;
281 #endif
282 #if !defined(PERL_XS) && !defined(WIN32DLL)
283 static  nkf_char     noconvert(FILE *f);
284 #endif
285 static  void    module_connection(void);
286 static  nkf_char     kanji_convert(FILE *f);
287 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
288 static  nkf_char     push_hold_buf(nkf_char c2);
289 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
290 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
291 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
292 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
293 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
294 /* UCS Mapping
295  * 0: Shift_JIS, eucJP-ascii
296  * 1: eucJP-ms
297  * 2: CP932, CP51932
298  * 3: CP10001
299  */
300 #define UCS_MAP_ASCII   0
301 #define UCS_MAP_MS      1
302 #define UCS_MAP_CP932   2
303 #define UCS_MAP_CP10001 3
304 static int ms_ucs_map_f = UCS_MAP_ASCII;
305 #endif
306 #ifdef UTF8_INPUT_ENABLE
307 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
308 static  int     no_cp932ext_f = FALSE;
309 /* ignore ZERO WIDTH NO-BREAK SPACE */
310 static  int     no_best_fit_chars_f = FALSE;
311 static  int     input_endian = ENDIAN_BIG;
312 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
313 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
314 static  void    encode_fallback_html(nkf_char c);
315 static  void    encode_fallback_xml(nkf_char c);
316 static  void    encode_fallback_java(nkf_char c);
317 static  void    encode_fallback_perl(nkf_char c);
318 static  void    encode_fallback_subchar(nkf_char c);
319 static  void    (*encode_fallback)(nkf_char c) = NULL;
320 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
321 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
322 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
323 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
324 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
325 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
326 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
327 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
328 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
329 static  void    w_status(struct input_code *, nkf_char);
330 #endif
331 #ifdef UTF8_OUTPUT_ENABLE
332 static  int     output_bom_f = FALSE;
333 static  int     output_endian = ENDIAN_BIG;
334 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
335 static  void    w_oconv(nkf_char c2,nkf_char c1);
336 static  void    w_oconv16(nkf_char c2,nkf_char c1);
337 static  void    w_oconv32(nkf_char c2,nkf_char c1);
338 #endif
339 static  void    e_oconv(nkf_char c2,nkf_char c1);
340 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
341 static  void    s_oconv(nkf_char c2,nkf_char c1);
342 static  void    j_oconv(nkf_char c2,nkf_char c1);
343 static  void    fold_conv(nkf_char c2,nkf_char c1);
344 static  void    nl_conv(nkf_char c2,nkf_char c1);
345 static  void    z_conv(nkf_char c2,nkf_char c1);
346 static  void    rot_conv(nkf_char c2,nkf_char c1);
347 static  void    hira_conv(nkf_char c2,nkf_char c1);
348 static  void    base64_conv(nkf_char c2,nkf_char c1);
349 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
350 static  void    no_connection(nkf_char c2,nkf_char c1);
351 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
352
353 static  void    code_score(struct input_code *ptr);
354 static  void    code_status(nkf_char c);
355
356 static  void    std_putc(nkf_char c);
357 static  nkf_char     std_getc(FILE *f);
358 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
359
360 static  nkf_char     broken_getc(FILE *f);
361 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
362
363 static  nkf_char     mime_begin(FILE *f);
364 static  nkf_char     mime_getc(FILE *f);
365 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
366
367 static  void    switch_mime_getc(void);
368 static  void    unswitch_mime_getc(void);
369 static  nkf_char     mime_begin_strict(FILE *f);
370 static  nkf_char     mime_getc_buf(FILE *f);
371 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
372 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
373
374 static  nkf_char     base64decode(nkf_char c);
375 static  void    mime_prechar(nkf_char c2, nkf_char c1);
376 static  void    mime_putc(nkf_char c);
377 static  void    open_mime(nkf_char c);
378 static  void    close_mime(void);
379 static  void    eof_mime(void);
380 static  void    mimeout_addchar(nkf_char c);
381 #ifndef PERL_XS
382 static  void    usage(void);
383 static  void    version(void);
384 #endif
385 static  void    options(unsigned char *c);
386 static  void    reinit(void);
387
388 /* buffers */
389
390 #if !defined(PERL_XS) && !defined(WIN32DLL)
391 static unsigned char   stdibuf[IOBUF_SIZE];
392 static unsigned char   stdobuf[IOBUF_SIZE];
393 #endif
394 static unsigned char   hold_buf[HOLD_SIZE*2];
395 static int             hold_count = 0;
396
397 /* MIME preprocessor fifo */
398
399 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
400 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
401 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
402 static unsigned char           mime_buf[MIME_BUF_SIZE];
403 static unsigned int            mime_top = 0;
404 static unsigned int            mime_last = 0;  /* decoded */
405 static unsigned int            mime_input = 0; /* undecoded */
406 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
407
408 /* flags */
409 static int             unbuf_f = FALSE;
410 static int             estab_f = FALSE;
411 static int             nop_f = FALSE;
412 static int             binmode_f = TRUE;       /* binary mode */
413 static int             rot_f = FALSE;          /* rot14/43 mode */
414 static int             hira_f = FALSE;          /* hira/kata henkan */
415 static int             input_f = FALSE;        /* non fixed input code  */
416 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
417 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
418 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
419 static int             mimebuf_f = FALSE;      /* MIME buffered input */
420 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
421 static int             iso8859_f = FALSE;      /* ISO8859 through */
422 static int             mimeout_f = FALSE;       /* base64 mode */
423 #if defined(MSDOS) || defined(__OS2__)
424 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
425 #else
426 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
427 #endif
428 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
429
430 #ifdef UNICODE_NORMALIZATION
431 static int nfc_f = FALSE;
432 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
433 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
434 static nkf_char nfc_getc(FILE *f);
435 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
436 #endif
437
438 #ifdef INPUT_OPTION
439 static int cap_f = FALSE;
440 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
441 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
442 static nkf_char cap_getc(FILE *f);
443 static nkf_char cap_ungetc(nkf_char c,FILE *f);
444
445 static int url_f = FALSE;
446 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
447 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
448 static nkf_char url_getc(FILE *f);
449 static nkf_char url_ungetc(nkf_char c,FILE *f);
450 #endif
451
452 #if defined(INT_IS_SHORT)
453 #define NKF_INT32_C(n)   (n##L)
454 #else
455 #define NKF_INT32_C(n)   (n)
456 #endif
457 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
458 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
459 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
460 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
461 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
462 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
463 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
464
465 #ifdef NUMCHAR_OPTION
466 static int numchar_f = FALSE;
467 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
468 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
469 static nkf_char numchar_getc(FILE *f);
470 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
471 #endif
472
473 #ifdef CHECK_OPTION
474 static int noout_f = FALSE;
475 static void no_putc(nkf_char c);
476 static int debug_f = FALSE;
477 static void debug(const char *str);
478 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
479 #endif
480
481 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
482 #if !defined PERL_XS
483 static  void    print_guessed_code(char *filename);
484 #endif
485 static  void    set_input_codename(char *codename);
486
487 #ifdef EXEC_IO
488 static int exec_f = 0;
489 #endif
490
491 #ifdef SHIFTJIS_CP932
492 /* invert IBM extended characters to others */
493 static int cp51932_f = FALSE;
494
495 /* invert NEC-selected IBM extended characters to IBM extended characters */
496 static int cp932inv_f = TRUE;
497
498 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
499 #endif /* SHIFTJIS_CP932 */
500
501 #ifdef X0212_ENABLE
502 static int x0212_f = FALSE;
503 static nkf_char x0212_shift(nkf_char c);
504 static nkf_char x0212_unshift(nkf_char c);
505 #endif
506 static int x0213_f = FALSE;
507
508 static unsigned char prefix_table[256];
509
510 static void set_code_score(struct input_code *ptr, nkf_char score);
511 static void clr_code_score(struct input_code *ptr, nkf_char score);
512 static void status_disable(struct input_code *ptr);
513 static void status_push_ch(struct input_code *ptr, nkf_char c);
514 static void status_clear(struct input_code *ptr);
515 static void status_reset(struct input_code *ptr);
516 static void status_reinit(struct input_code *ptr);
517 static void status_check(struct input_code *ptr, nkf_char c);
518 static void e_status(struct input_code *, nkf_char);
519 static void s_status(struct input_code *, nkf_char);
520
521 struct input_code input_code_list[] = {
522     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
523     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
524 #ifdef UTF8_INPUT_ENABLE
525     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
526     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
527     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
528 #endif
529     {0}
530 };
531
532 static int              mimeout_mode = 0;
533 static int              base64_count = 0;
534
535 /* X0208 -> ASCII converter */
536
537 /* fold parameter */
538 static int             f_line = 0;    /* chars in line */
539 static int             f_prev = 0;
540 static int             fold_preserve_f = FALSE; /* preserve new lines */
541 static int             fold_f  = FALSE;
542 static int             fold_len  = 0;
543
544 /* options */
545 static unsigned char   kanji_intro = DEFAULT_J;
546 static unsigned char   ascii_intro = DEFAULT_R;
547
548 /* Folding */
549
550 #define FOLD_MARGIN  10
551 #define DEFAULT_FOLD 60
552
553 static int             fold_margin  = FOLD_MARGIN;
554
555 /* converters */
556
557 #ifdef DEFAULT_CODE_JIS
558 #   define  DEFAULT_CONV j_oconv
559 #endif
560 #ifdef DEFAULT_CODE_SJIS
561 #   define  DEFAULT_CONV s_oconv
562 #endif
563 #ifdef DEFAULT_CODE_EUC
564 #   define  DEFAULT_CONV e_oconv
565 #endif
566 #ifdef DEFAULT_CODE_UTF8
567 #   define  DEFAULT_CONV w_oconv
568 #endif
569
570 /* process default */
571 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
572
573 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
574 /* s_iconv or oconv */
575 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
576
577 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
578 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
579 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
580 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
581 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
582 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
583 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
584
585 /* static redirections */
586
587 static  void   (*o_putc)(nkf_char c) = std_putc;
588
589 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
590 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
591
592 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
593 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
594
595 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
596
597 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
598 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
599
600 /* for strict mime */
601 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
602 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
603
604 /* Global states */
605 static int output_mode = ASCII,    /* output kanji mode */
606            input_mode =  ASCII,    /* input kanji mode */
607            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
608 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
609
610 /* X0201 / X0208 conversion tables */
611
612 /* X0201 kana conversion table */
613 /* 90-9F A0-DF */
614 static const unsigned char cv[]= {
615     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
616     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
617     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
618     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
619     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
620     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
621     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
622     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
623     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
624     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
625     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
626     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
627     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
628     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
629     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
630     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
631     0x00,0x00};
632
633
634 /* X0201 kana conversion table for daguten */
635 /* 90-9F A0-DF */
636 static const unsigned char dv[]= {
637     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
638     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
639     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
642     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
643     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
644     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
645     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
646     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
647     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
648     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
649     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
650     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
651     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
652     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
653     0x00,0x00};
654
655 /* X0201 kana conversion table for han-daguten */
656 /* 90-9F A0-DF */
657 static const unsigned char ev[]= {
658     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
659     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
660     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
661     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
669     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
672     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
674     0x00,0x00};
675
676
677 /* X0208 kigou conversion table */
678 /* 0x8140 - 0x819e */
679 static const unsigned char fv[] = {
680
681     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
682     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
683     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
684     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
685     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
686     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
687     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
688     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
689     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
690     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
691     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
692     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
693 } ;
694
695
696
697 static int             file_out_f = FALSE;
698 #ifdef OVERWRITE
699 static int             overwrite_f = FALSE;
700 static int             preserve_time_f = FALSE;
701 static int             backup_f = FALSE;
702 static char            *backup_suffix = "";
703 static char *get_backup_filename(const char *suffix, const char *filename);
704 #endif
705
706 static int nlmode_f = 0;   /* CR, LF, CRLF */
707 static int input_nextline = 0; /* 0: unestablished, EOF: MIXED */
708 static nkf_char prev_cr = 0; /* CR or 0 */
709 #ifdef EASYWIN /*Easy Win */
710 static int             end_check;
711 #endif /*Easy Win */
712
713 #define STD_GC_BUFSIZE (256)
714 nkf_char std_gc_buf[STD_GC_BUFSIZE];
715 nkf_char std_gc_ndx;
716
717 #ifdef WIN32DLL
718 #include "nkf32dll.c"
719 #elif defined(PERL_XS)
720 #else /* WIN32DLL */
721 int main(int argc, char **argv)
722 {
723     FILE  *fin;
724     unsigned char  *cp;
725
726     char *outfname = NULL;
727     char *origfname;
728
729 #ifdef EASYWIN /*Easy Win */
730     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
731 #endif
732
733     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
734         cp = (unsigned char *)*argv;
735         options(cp);
736         if (guess_f) {
737 #ifdef CHECK_OPTION
738             int debug_f_back = debug_f;
739 #endif
740 #ifdef EXEC_IO
741             int exec_f_back = exec_f;
742 #endif
743 #ifdef X0212_ENABLE
744             int x0212_f_back = x0212_f;
745 #endif
746 #ifdef X0212_ENABLE
747             int x0213_f_back = x0213_f;
748 #endif
749             int guess_f_back = guess_f;
750             reinit();
751             guess_f = guess_f_back;
752             mime_f = FALSE;
753 #ifdef CHECK_OPTION
754             debug_f = debug_f_back;
755 #endif
756 #ifdef EXEC_IO
757             exec_f = exec_f_back;
758 #endif
759 #ifdef X0212_ENABLE
760             x0212_f = x0212_f_back;
761 #endif
762 #ifdef X0213_ENABLE
763             x0213_f = x0213_f_back;
764 #endif
765     }
766 #ifdef EXEC_IO
767         if (exec_f){
768             int fds[2], pid;
769             if (pipe(fds) < 0 || (pid = fork()) < 0){
770                 abort();
771             }
772             if (pid == 0){
773                 if (exec_f > 0){
774                     close(fds[0]);
775                     dup2(fds[1], 1);
776                 }else{
777                     close(fds[1]);
778                     dup2(fds[0], 0);
779                 }
780                 execvp(argv[1], &argv[1]);
781             }
782             if (exec_f > 0){
783                 close(fds[1]);
784                 dup2(fds[0], 0);
785             }else{
786                 close(fds[0]);
787                 dup2(fds[1], 1);
788             }
789             argc = 0;
790             break;
791         }
792 #endif
793     }
794     if(x0201_f == WISH_TRUE)
795          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
796
797     if (binmode_f == TRUE)
798 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
799     if (freopen("","wb",stdout) == NULL)
800         return (-1);
801 #else
802     setbinmode(stdout);
803 #endif
804
805     if (unbuf_f)
806       setbuf(stdout, (char *) NULL);
807     else
808       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
809
810     if (argc == 0) {
811       if (binmode_f == TRUE)
812 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
813       if (freopen("","rb",stdin) == NULL) return (-1);
814 #else
815       setbinmode(stdin);
816 #endif
817       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
818       if (nop_f)
819           noconvert(stdin);
820       else {
821           kanji_convert(stdin);
822           if (guess_f) print_guessed_code(NULL);
823       }
824     } else {
825       int nfiles = argc;
826         int is_argument_error = FALSE;
827       while (argc--) {
828             input_codename = NULL;
829             input_nextline = 0;
830 #ifdef CHECK_OPTION
831             iconv_for_check = 0;
832 #endif
833           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
834               perror(*--argv);
835                 *argv++;
836                 is_argument_error = TRUE;
837                 continue;
838           } else {
839 #ifdef OVERWRITE
840               int fd = 0;
841               int fd_backup = 0;
842 #endif
843
844 /* reopen file for stdout */
845               if (file_out_f == TRUE) {
846 #ifdef OVERWRITE
847                   if (overwrite_f){
848                       outfname = malloc(strlen(origfname)
849                                         + strlen(".nkftmpXXXXXX")
850                                         + 1);
851                       if (!outfname){
852                           perror(origfname);
853                           return -1;
854                       }
855                       strcpy(outfname, origfname);
856 #ifdef MSDOS
857                       {
858                           int i;
859                           for (i = strlen(outfname); i; --i){
860                               if (outfname[i - 1] == '/'
861                                   || outfname[i - 1] == '\\'){
862                                   break;
863                               }
864                           }
865                           outfname[i] = '\0';
866                       }
867                       strcat(outfname, "ntXXXXXX");
868                       mktemp(outfname);
869                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
870                                 S_IREAD | S_IWRITE);
871 #else
872                       strcat(outfname, ".nkftmpXXXXXX");
873                       fd = mkstemp(outfname);
874 #endif
875                       if (fd < 0
876                           || (fd_backup = dup(fileno(stdout))) < 0
877                           || dup2(fd, fileno(stdout)) < 0
878                           ){
879                           perror(origfname);
880                           return -1;
881                       }
882                   }else
883 #endif
884                   if(argc == 1) {
885                       outfname = *argv++;
886                       argc--;
887                   } else {
888                       outfname = "nkf.out";
889                   }
890
891                   if(freopen(outfname, "w", stdout) == NULL) {
892                       perror (outfname);
893                       return (-1);
894                   }
895                   if (binmode_f == TRUE) {
896 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
897                       if (freopen("","wb",stdout) == NULL)
898                            return (-1);
899 #else
900                       setbinmode(stdout);
901 #endif
902                   }
903               }
904               if (binmode_f == TRUE)
905 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
906                  if (freopen("","rb",fin) == NULL)
907                     return (-1);
908 #else
909                  setbinmode(fin);
910 #endif
911               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
912               if (nop_f)
913                   noconvert(fin);
914               else {
915                   char *filename = NULL;
916                   kanji_convert(fin);
917                   if (nfiles > 1) filename = origfname;
918                   if (guess_f) print_guessed_code(filename);
919               }
920               fclose(fin);
921 #ifdef OVERWRITE
922               if (overwrite_f) {
923                   struct stat     sb;
924 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
925                   time_t tb[2];
926 #else
927                   struct utimbuf  tb;
928 #endif
929
930                   fflush(stdout);
931                   close(fd);
932                   if (dup2(fd_backup, fileno(stdout)) < 0){
933                       perror("dup2");
934                   }
935                   if (stat(origfname, &sb)) {
936                       fprintf(stderr, "Can't stat %s\n", origfname);
937                   }
938                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
939                   if (chmod(outfname, sb.st_mode)) {
940                       fprintf(stderr, "Can't set permission %s\n", outfname);
941                   }
942
943                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
944                     if(preserve_time_f){
945 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
946                         tb[0] = tb[1] = sb.st_mtime;
947                         if (utime(outfname, tb)) {
948                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
949                         }
950 #else
951                         tb.actime  = sb.st_atime;
952                         tb.modtime = sb.st_mtime;
953                         if (utime(outfname, &tb)) {
954                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
955                         }
956 #endif
957                     }
958                     if(backup_f){
959                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
960 #ifdef MSDOS
961                         unlink(backup_filename);
962 #endif
963                         if (rename(origfname, backup_filename)) {
964                             perror(backup_filename);
965                             fprintf(stderr, "Can't rename %s to %s\n",
966                                     origfname, backup_filename);
967                         }
968                     }else{
969 #ifdef MSDOS
970                         if (unlink(origfname)){
971                             perror(origfname);
972                         }
973 #endif
974                     }
975                   if (rename(outfname, origfname)) {
976                       perror(origfname);
977                       fprintf(stderr, "Can't rename %s to %s\n",
978                               outfname, origfname);
979                   }
980                   free(outfname);
981               }
982 #endif
983           }
984       }
985         if (is_argument_error)
986             return(-1);
987     }
988 #ifdef EASYWIN /*Easy Win */
989     if (file_out_f == FALSE)
990         scanf("%d",&end_check);
991     else
992         fclose(stdout);
993 #else /* for Other OS */
994     if (file_out_f == TRUE)
995         fclose(stdout);
996 #endif /*Easy Win */
997     return (0);
998 }
999 #endif /* WIN32DLL */
1000
1001 #ifdef OVERWRITE
1002 char *get_backup_filename(const char *suffix, const char *filename)
1003 {
1004     char *backup_filename;
1005     int asterisk_count = 0;
1006     int i, j;
1007     int filename_length = strlen(filename);
1008
1009     for(i = 0; suffix[i]; i++){
1010         if(suffix[i] == '*') asterisk_count++;
1011     }
1012
1013     if(asterisk_count){
1014         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1015         if (!backup_filename){
1016             perror("Can't malloc backup filename.");
1017             return NULL;
1018         }
1019
1020         for(i = 0, j = 0; suffix[i];){
1021             if(suffix[i] == '*'){
1022                 backup_filename[j] = '\0';
1023                 strncat(backup_filename, filename, filename_length);
1024                 i++;
1025                 j += filename_length;
1026             }else{
1027                 backup_filename[j++] = suffix[i++];
1028             }
1029         }
1030         backup_filename[j] = '\0';
1031     }else{
1032         j = strlen(suffix) + filename_length;
1033         backup_filename = malloc( + 1);
1034         strcpy(backup_filename, filename);
1035         strcat(backup_filename, suffix);
1036         backup_filename[j] = '\0';
1037     }
1038     return backup_filename;
1039 }
1040 #endif
1041
1042 static const struct {
1043     const char *name;
1044     const char *alias;
1045 } long_option[] = {
1046     {"ic=", ""},
1047     {"oc=", ""},
1048     {"base64","jMB"},
1049     {"euc","e"},
1050     {"euc-input","E"},
1051     {"fj","jm"},
1052     {"help","v"},
1053     {"jis","j"},
1054     {"jis-input","J"},
1055     {"mac","sLm"},
1056     {"mime","jM"},
1057     {"mime-input","m"},
1058     {"msdos","sLw"},
1059     {"sjis","s"},
1060     {"sjis-input","S"},
1061     {"unix","eLu"},
1062     {"version","V"},
1063     {"windows","sLw"},
1064     {"hiragana","h1"},
1065     {"katakana","h2"},
1066     {"katakana-hiragana","h3"},
1067     {"guess=", ""},
1068     {"guess", "g"},
1069     {"cp932", ""},
1070     {"no-cp932", ""},
1071 #ifdef X0212_ENABLE
1072     {"x0212", ""},
1073 #endif
1074 #ifdef UTF8_OUTPUT_ENABLE
1075     {"utf8", "w"},
1076     {"utf16", "w16"},
1077     {"ms-ucs-map", ""},
1078     {"fb-skip", ""},
1079     {"fb-html", ""},
1080     {"fb-xml", ""},
1081     {"fb-perl", ""},
1082     {"fb-java", ""},
1083     {"fb-subchar", ""},
1084     {"fb-subchar=", ""},
1085 #endif
1086 #ifdef UTF8_INPUT_ENABLE
1087     {"utf8-input", "W"},
1088     {"utf16-input", "W16"},
1089     {"no-cp932ext", ""},
1090     {"no-best-fit-chars",""},
1091 #endif
1092 #ifdef UNICODE_NORMALIZATION
1093     {"utf8mac-input", ""},
1094 #endif
1095 #ifdef OVERWRITE
1096     {"overwrite", ""},
1097     {"overwrite=", ""},
1098     {"in-place", ""},
1099     {"in-place=", ""},
1100 #endif
1101 #ifdef INPUT_OPTION
1102     {"cap-input", ""},
1103     {"url-input", ""},
1104 #endif
1105 #ifdef NUMCHAR_OPTION
1106     {"numchar-input", ""},
1107 #endif
1108 #ifdef CHECK_OPTION
1109     {"no-output", ""},
1110     {"debug", ""},
1111 #endif
1112 #ifdef SHIFTJIS_CP932
1113     {"cp932inv", ""},
1114 #endif
1115 #ifdef EXEC_IO
1116     {"exec-in", ""},
1117     {"exec-out", ""},
1118 #endif
1119     {"prefix=", ""},
1120 };
1121
1122 static int option_mode = 0;
1123
1124 void options(unsigned char *cp)
1125 {
1126     nkf_char i, j;
1127     unsigned char *p;
1128     unsigned char *cp_back = NULL;
1129     char codeset[32];
1130
1131     if (option_mode==1)
1132         return;
1133     while(*cp && *cp++!='-');
1134     while (*cp || cp_back) {
1135         if(!*cp){
1136             cp = cp_back;
1137             cp_back = NULL;
1138             continue;
1139         }
1140         p = 0;
1141         switch (*cp++) {
1142         case '-':  /* literal options */
1143             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1144                 option_mode = 1;
1145                 return;
1146             }
1147             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1148                 p = (unsigned char *)long_option[i].name;
1149                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1150                 if (*p == cp[j] || cp[j] == SP){
1151                     p = &cp[j] + 1;
1152                     break;
1153                 }
1154                 p = 0;
1155             }
1156             if (p == 0) {
1157                 fprintf(stderr, "unknown long option: --%s\n", cp);
1158                 return;
1159             }
1160             while(*cp && *cp != SP && cp++);
1161             if (long_option[i].alias[0]){
1162                 cp_back = cp;
1163                 cp = (unsigned char *)long_option[i].alias;
1164             }else{
1165                 if (strcmp(long_option[i].name, "ic=") == 0){
1166                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1167                         codeset[i] = nkf_toupper(p[i]);
1168                     }
1169                     codeset[i] = 0;
1170                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1171                         input_f = JIS_INPUT;
1172                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1173                       strcmp(codeset, "CP50220") == 0 ||
1174                       strcmp(codeset, "CP50221") == 0 ||
1175                       strcmp(codeset, "CP50222") == 0){
1176                         input_f = JIS_INPUT;
1177 #ifdef SHIFTJIS_CP932
1178                         cp51932_f = TRUE;
1179 #endif
1180 #ifdef UTF8_OUTPUT_ENABLE
1181                         ms_ucs_map_f = UCS_MAP_CP932;
1182 #endif
1183                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1184                         input_f = JIS_INPUT;
1185 #ifdef X0212_ENABLE
1186                         x0212_f = TRUE;
1187 #endif
1188                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1189                         input_f = JIS_INPUT;
1190 #ifdef X0212_ENABLE
1191                         x0212_f = TRUE;
1192 #endif
1193                         x0213_f = TRUE;
1194                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1195                         input_f = SJIS_INPUT;
1196                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1197                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1198                              strcmp(codeset, "CP932") == 0 ||
1199                              strcmp(codeset, "MS932") == 0){
1200                         input_f = SJIS_INPUT;
1201 #ifdef SHIFTJIS_CP932
1202                         cp51932_f = TRUE;
1203 #endif
1204 #ifdef UTF8_OUTPUT_ENABLE
1205                         ms_ucs_map_f = UCS_MAP_CP932;
1206 #endif
1207                     }else if(strcmp(codeset, "CP10001") == 0){
1208                         input_f = SJIS_INPUT;
1209 #ifdef SHIFTJIS_CP932
1210                         cp51932_f = TRUE;
1211 #endif
1212 #ifdef UTF8_OUTPUT_ENABLE
1213                         ms_ucs_map_f = UCS_MAP_CP10001;
1214 #endif
1215                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1216                              strcmp(codeset, "EUC-JP") == 0){
1217                         input_f = EUC_INPUT;
1218                     }else if(strcmp(codeset, "CP51932") == 0){
1219                         input_f = EUC_INPUT;
1220 #ifdef SHIFTJIS_CP932
1221                         cp51932_f = TRUE;
1222 #endif
1223 #ifdef UTF8_OUTPUT_ENABLE
1224                         ms_ucs_map_f = UCS_MAP_CP932;
1225 #endif
1226                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1227                              strcmp(codeset, "EUCJP-MS") == 0 ||
1228                              strcmp(codeset, "EUCJPMS") == 0){
1229                         input_f = EUC_INPUT;
1230 #ifdef SHIFTJIS_CP932
1231                         cp51932_f = FALSE;
1232 #endif
1233 #ifdef UTF8_OUTPUT_ENABLE
1234                         ms_ucs_map_f = UCS_MAP_MS;
1235 #endif
1236                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1237                              strcmp(codeset, "EUCJP-ASCII") == 0){
1238                         input_f = EUC_INPUT;
1239 #ifdef SHIFTJIS_CP932
1240                         cp51932_f = FALSE;
1241 #endif
1242 #ifdef UTF8_OUTPUT_ENABLE
1243                         ms_ucs_map_f = UCS_MAP_ASCII;
1244 #endif
1245                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1246                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1247                         input_f = SJIS_INPUT;
1248                         x0213_f = TRUE;
1249 #ifdef SHIFTJIS_CP932
1250                         cp51932_f = FALSE;
1251 #endif
1252                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1253                              strcmp(codeset, "EUC-JIS-2004") == 0){
1254                         input_f = EUC_INPUT;
1255                         x0213_f = TRUE;
1256 #ifdef SHIFTJIS_CP932
1257                         cp51932_f = FALSE;
1258 #endif
1259 #ifdef UTF8_INPUT_ENABLE
1260                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1261                              strcmp(codeset, "UTF-8N") == 0 ||
1262                              strcmp(codeset, "UTF-8-BOM") == 0){
1263                         input_f = UTF8_INPUT;
1264 #ifdef UNICODE_NORMALIZATION
1265                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1266                              strcmp(codeset, "UTF-8-MAC") == 0){
1267                         input_f = UTF8_INPUT;
1268                         nfc_f = TRUE;
1269 #endif
1270                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1271                              strcmp(codeset, "UTF-16BE") == 0 ||
1272                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1273                         input_f = UTF16_INPUT;
1274                         input_endian = ENDIAN_BIG;
1275                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1276                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1277                         input_f = UTF16_INPUT;
1278                         input_endian = ENDIAN_LITTLE;
1279                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1280                              strcmp(codeset, "UTF-32BE") == 0 ||
1281                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1282                         input_f = UTF32_INPUT;
1283                         input_endian = ENDIAN_BIG;
1284                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1285                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1286                         input_f = UTF32_INPUT;
1287                         input_endian = ENDIAN_LITTLE;
1288 #endif
1289                     } else {
1290                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1291                     }
1292                     continue;
1293                 }
1294                 if (strcmp(long_option[i].name, "oc=") == 0){
1295                     x0201_f = FALSE;
1296                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1297                         codeset[i] = nkf_toupper(p[i]);
1298                     }
1299                     codeset[i] = 0;
1300                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1301                         output_conv = j_oconv;
1302                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1303                         output_conv = j_oconv;
1304                         no_cp932ext_f = TRUE;
1305 #ifdef SHIFTJIS_CP932
1306                         cp932inv_f = FALSE;
1307 #endif
1308 #ifdef UTF8_OUTPUT_ENABLE
1309                         ms_ucs_map_f = UCS_MAP_CP932;
1310 #endif
1311                     }else if(strcmp(codeset, "CP50220") == 0){
1312                         output_conv = j_oconv;
1313                         x0201_f = TRUE;
1314 #ifdef SHIFTJIS_CP932
1315                         cp932inv_f = FALSE;
1316 #endif
1317 #ifdef UTF8_OUTPUT_ENABLE
1318                         ms_ucs_map_f = UCS_MAP_CP932;
1319 #endif
1320                     }else if(strcmp(codeset, "CP50221") == 0){
1321                         output_conv = j_oconv;
1322 #ifdef SHIFTJIS_CP932
1323                         cp932inv_f = FALSE;
1324 #endif
1325 #ifdef UTF8_OUTPUT_ENABLE
1326                         ms_ucs_map_f = UCS_MAP_CP932;
1327 #endif
1328                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1329                         output_conv = j_oconv;
1330 #ifdef X0212_ENABLE
1331                         x0212_f = TRUE;
1332 #endif
1333 #ifdef SHIFTJIS_CP932
1334                         cp932inv_f = FALSE;
1335 #endif
1336                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1337                         output_conv = j_oconv;
1338 #ifdef X0212_ENABLE
1339                         x0212_f = TRUE;
1340 #endif
1341                         x0213_f = TRUE;
1342 #ifdef SHIFTJIS_CP932
1343                         cp932inv_f = FALSE;
1344 #endif
1345                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1346                         output_conv = s_oconv;
1347                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1348                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1349                              strcmp(codeset, "CP932") == 0 ||
1350                              strcmp(codeset, "MS932") == 0){
1351                         output_conv = s_oconv;
1352 #ifdef UTF8_OUTPUT_ENABLE
1353                         ms_ucs_map_f = UCS_MAP_CP932;
1354 #endif
1355                     }else if(strcmp(codeset, "CP10001") == 0){
1356                         output_conv = s_oconv;
1357 #ifdef UTF8_OUTPUT_ENABLE
1358                         ms_ucs_map_f = UCS_MAP_CP10001;
1359 #endif
1360                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1361                              strcmp(codeset, "EUC-JP") == 0){
1362                         output_conv = e_oconv;
1363                     }else if(strcmp(codeset, "CP51932") == 0){
1364                         output_conv = e_oconv;
1365 #ifdef SHIFTJIS_CP932
1366                         cp932inv_f = FALSE;
1367 #endif
1368 #ifdef UTF8_OUTPUT_ENABLE
1369                         ms_ucs_map_f = UCS_MAP_CP932;
1370 #endif
1371                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1372                              strcmp(codeset, "EUCJP-MS") == 0 ||
1373                              strcmp(codeset, "EUCJPMS") == 0){
1374                         output_conv = e_oconv;
1375 #ifdef X0212_ENABLE
1376                         x0212_f = TRUE;
1377 #endif
1378 #ifdef UTF8_OUTPUT_ENABLE
1379                         ms_ucs_map_f = UCS_MAP_MS;
1380 #endif
1381                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1382                              strcmp(codeset, "EUCJP-ASCII") == 0){
1383                         output_conv = e_oconv;
1384 #ifdef X0212_ENABLE
1385                         x0212_f = TRUE;
1386 #endif
1387 #ifdef UTF8_OUTPUT_ENABLE
1388                         ms_ucs_map_f = UCS_MAP_ASCII;
1389 #endif
1390                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1391                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1392                         output_conv = s_oconv;
1393                         x0213_f = TRUE;
1394 #ifdef SHIFTJIS_CP932
1395                         cp932inv_f = FALSE;
1396 #endif
1397                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1398                              strcmp(codeset, "EUC-JIS-2004") == 0){
1399                         output_conv = e_oconv;
1400 #ifdef X0212_ENABLE
1401                         x0212_f = TRUE;
1402 #endif
1403                         x0213_f = TRUE;
1404 #ifdef SHIFTJIS_CP932
1405                         cp932inv_f = FALSE;
1406 #endif
1407 #ifdef UTF8_OUTPUT_ENABLE
1408                     }else if(strcmp(codeset, "UTF-8") == 0){
1409                         output_conv = w_oconv;
1410                     }else if(strcmp(codeset, "UTF-8N") == 0){
1411                         output_conv = w_oconv;
1412                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1413                         output_conv = w_oconv;
1414                         output_bom_f = TRUE;
1415                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1416                         output_conv = w_oconv16;
1417                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1418                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1419                         output_conv = w_oconv16;
1420                         output_bom_f = TRUE;
1421                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1422                         output_conv = w_oconv16;
1423                         output_endian = ENDIAN_LITTLE;
1424                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1425                         output_conv = w_oconv16;
1426                         output_endian = ENDIAN_LITTLE;
1427                         output_bom_f = TRUE;
1428                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1429                              strcmp(codeset, "UTF-32BE") == 0){
1430                         output_conv = w_oconv32;
1431                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1432                         output_conv = w_oconv32;
1433                         output_bom_f = TRUE;
1434                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1435                         output_conv = w_oconv32;
1436                         output_endian = ENDIAN_LITTLE;
1437                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1438                         output_conv = w_oconv32;
1439                         output_endian = ENDIAN_LITTLE;
1440                         output_bom_f = TRUE;
1441 #endif
1442                     } else {
1443                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1444                     }
1445                     continue;
1446                 }
1447                 if (strcmp(long_option[i].name, "guess=") == 0){
1448                     if (p[0] == '1') {
1449                         guess_f = 2;
1450                     } else {
1451                         guess_f = 1;
1452                     }
1453                     continue;
1454                 }
1455 #ifdef OVERWRITE
1456                 if (strcmp(long_option[i].name, "overwrite") == 0){
1457                     file_out_f = TRUE;
1458                     overwrite_f = TRUE;
1459                     preserve_time_f = TRUE;
1460                     continue;
1461                 }
1462                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1463                     file_out_f = TRUE;
1464                     overwrite_f = TRUE;
1465                     preserve_time_f = TRUE;
1466                     backup_f = TRUE;
1467                     backup_suffix = malloc(strlen((char *) p) + 1);
1468                     strcpy(backup_suffix, (char *) p);
1469                     continue;
1470                 }
1471                 if (strcmp(long_option[i].name, "in-place") == 0){
1472                     file_out_f = TRUE;
1473                     overwrite_f = TRUE;
1474                     preserve_time_f = FALSE;
1475                     continue;
1476                 }
1477                 if (strcmp(long_option[i].name, "in-place=") == 0){
1478                     file_out_f = TRUE;
1479                     overwrite_f = TRUE;
1480                     preserve_time_f = FALSE;
1481                     backup_f = TRUE;
1482                     backup_suffix = malloc(strlen((char *) p) + 1);
1483                     strcpy(backup_suffix, (char *) p);
1484                     continue;
1485                 }
1486 #endif
1487 #ifdef INPUT_OPTION
1488                 if (strcmp(long_option[i].name, "cap-input") == 0){
1489                     cap_f = TRUE;
1490                     continue;
1491                 }
1492                 if (strcmp(long_option[i].name, "url-input") == 0){
1493                     url_f = TRUE;
1494                     continue;
1495                 }
1496 #endif
1497 #ifdef NUMCHAR_OPTION
1498                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1499                     numchar_f = TRUE;
1500                     continue;
1501                 }
1502 #endif
1503 #ifdef CHECK_OPTION
1504                 if (strcmp(long_option[i].name, "no-output") == 0){
1505                     noout_f = TRUE;
1506                     continue;
1507                 }
1508                 if (strcmp(long_option[i].name, "debug") == 0){
1509                     debug_f = TRUE;
1510                     continue;
1511                 }
1512 #endif
1513                 if (strcmp(long_option[i].name, "cp932") == 0){
1514 #ifdef SHIFTJIS_CP932
1515                     cp51932_f = TRUE;
1516                     cp932inv_f = TRUE;
1517 #endif
1518 #ifdef UTF8_OUTPUT_ENABLE
1519                     ms_ucs_map_f = UCS_MAP_CP932;
1520 #endif
1521                     continue;
1522                 }
1523                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1524 #ifdef SHIFTJIS_CP932
1525                     cp51932_f = FALSE;
1526                     cp932inv_f = FALSE;
1527 #endif
1528 #ifdef UTF8_OUTPUT_ENABLE
1529                     ms_ucs_map_f = UCS_MAP_ASCII;
1530 #endif
1531                     continue;
1532                 }
1533 #ifdef SHIFTJIS_CP932
1534                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1535                     cp932inv_f = TRUE;
1536                     continue;
1537                 }
1538 #endif
1539
1540 #ifdef X0212_ENABLE
1541                 if (strcmp(long_option[i].name, "x0212") == 0){
1542                     x0212_f = TRUE;
1543                     continue;
1544                 }
1545 #endif
1546
1547 #ifdef EXEC_IO
1548                   if (strcmp(long_option[i].name, "exec-in") == 0){
1549                       exec_f = 1;
1550                       return;
1551                   }
1552                   if (strcmp(long_option[i].name, "exec-out") == 0){
1553                       exec_f = -1;
1554                       return;
1555                   }
1556 #endif
1557 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1558                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1559                     no_cp932ext_f = TRUE;
1560                     continue;
1561                 }
1562                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1563                     no_best_fit_chars_f = TRUE;
1564                     continue;
1565                 }
1566                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1567                     encode_fallback = NULL;
1568                     continue;
1569                 }
1570                 if (strcmp(long_option[i].name, "fb-html") == 0){
1571                     encode_fallback = encode_fallback_html;
1572                     continue;
1573                 }
1574                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1575                     encode_fallback = encode_fallback_xml;
1576                     continue;
1577                 }
1578                 if (strcmp(long_option[i].name, "fb-java") == 0){
1579                     encode_fallback = encode_fallback_java;
1580                     continue;
1581                 }
1582                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1583                     encode_fallback = encode_fallback_perl;
1584                     continue;
1585                 }
1586                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1587                     encode_fallback = encode_fallback_subchar;
1588                     continue;
1589                 }
1590                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1591                     encode_fallback = encode_fallback_subchar;
1592                     unicode_subchar = 0;
1593                     if (p[0] != '0'){
1594                         /* decimal number */
1595                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1596                             unicode_subchar *= 10;
1597                             unicode_subchar += hex2bin(p[i]);
1598                         }
1599                     }else if(p[1] == 'x' || p[1] == 'X'){
1600                         /* hexadecimal number */
1601                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1602                             unicode_subchar <<= 4;
1603                             unicode_subchar |= hex2bin(p[i]);
1604                         }
1605                     }else{
1606                         /* octal number */
1607                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1608                             unicode_subchar *= 8;
1609                             unicode_subchar += hex2bin(p[i]);
1610                         }
1611                     }
1612                     w16e_conv(unicode_subchar, &i, &j);
1613                     unicode_subchar = i<<8 | j;
1614                     continue;
1615                 }
1616 #endif
1617 #ifdef UTF8_OUTPUT_ENABLE
1618                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1619                     ms_ucs_map_f = UCS_MAP_MS;
1620                     continue;
1621                 }
1622 #endif
1623 #ifdef UNICODE_NORMALIZATION
1624                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1625                     input_f = UTF8_INPUT;
1626                     nfc_f = TRUE;
1627                     continue;
1628                 }
1629 #endif
1630                 if (strcmp(long_option[i].name, "prefix=") == 0){
1631                     if (nkf_isgraph(p[0])){
1632                         for (i = 1; nkf_isgraph(p[i]); i++){
1633                             prefix_table[p[i]] = p[0];
1634                         }
1635                     }
1636                     continue;
1637                 }
1638             }
1639             continue;
1640         case 'b':           /* buffered mode */
1641             unbuf_f = FALSE;
1642             continue;
1643         case 'u':           /* non bufferd mode */
1644             unbuf_f = TRUE;
1645             continue;
1646         case 't':           /* transparent mode */
1647             if (*cp=='1') {
1648                 /* alias of -t */
1649                 nop_f = TRUE;
1650                 *cp++;
1651             } else if (*cp=='2') {
1652                 /*
1653                  * -t with put/get
1654                  *
1655                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1656                  *
1657                  */
1658                 nop_f = 2;
1659                 *cp++;
1660             } else
1661                 nop_f = TRUE;
1662             continue;
1663         case 'j':           /* JIS output */
1664         case 'n':
1665             output_conv = j_oconv;
1666             continue;
1667         case 'e':           /* AT&T EUC output */
1668             output_conv = e_oconv;
1669             cp932inv_f = FALSE;
1670             continue;
1671         case 's':           /* SJIS output */
1672             output_conv = s_oconv;
1673             continue;
1674         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1675             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1676             input_f = LATIN1_INPUT;
1677             continue;
1678         case 'i':           /* Kanji IN ESC-$-@/B */
1679             if (*cp=='@'||*cp=='B')
1680                 kanji_intro = *cp++;
1681             continue;
1682         case 'o':           /* ASCII IN ESC-(-J/B */
1683             if (*cp=='J'||*cp=='B'||*cp=='H')
1684                 ascii_intro = *cp++;
1685             continue;
1686         case 'h':
1687             /*
1688                 bit:1   katakana->hiragana
1689                 bit:2   hiragana->katakana
1690             */
1691             if ('9'>= *cp && *cp>='0')
1692                 hira_f |= (*cp++ -'0');
1693             else
1694                 hira_f |= 1;
1695             continue;
1696         case 'r':
1697             rot_f = TRUE;
1698             continue;
1699 #if defined(MSDOS) || defined(__OS2__)
1700         case 'T':
1701             binmode_f = FALSE;
1702             continue;
1703 #endif
1704 #ifndef PERL_XS
1705         case 'V':
1706             version();
1707             exit(1);
1708             break;
1709         case 'v':
1710             usage();
1711             exit(1);
1712             break;
1713 #endif
1714 #ifdef UTF8_OUTPUT_ENABLE
1715         case 'w':           /* UTF-8 output */
1716             if (cp[0] == '8') {
1717                 output_conv = w_oconv; cp++;
1718                 if (cp[0] == '0'){
1719                     cp++;
1720                 } else {
1721                     output_bom_f = TRUE;
1722                 }
1723             } else {
1724                 if ('1'== cp[0] && '6'==cp[1]) {
1725                     output_conv = w_oconv16; cp+=2;
1726                 } else if ('3'== cp[0] && '2'==cp[1]) {
1727                     output_conv = w_oconv32; cp+=2;
1728                 } else {
1729                     output_conv = w_oconv;
1730                     continue;
1731                 }
1732                 if (cp[0]=='L') {
1733                     cp++;
1734                     output_endian = ENDIAN_LITTLE;
1735                 } else if (cp[0] == 'B') {
1736                     cp++;
1737                 } else {
1738                     continue;
1739                 }
1740                 if (cp[0] == '0'){
1741                     cp++;
1742                 } else {
1743                     output_bom_f = TRUE;
1744                 }
1745             }
1746             continue;
1747 #endif
1748 #ifdef UTF8_INPUT_ENABLE
1749         case 'W':           /* UTF input */
1750             if (cp[0] == '8') {
1751                 cp++;
1752                 input_f = UTF8_INPUT;
1753             }else{
1754                 if ('1'== cp[0] && '6'==cp[1]) {
1755                     cp += 2;
1756                     input_f = UTF16_INPUT;
1757                     input_endian = ENDIAN_BIG;
1758                 } else if ('3'== cp[0] && '2'==cp[1]) {
1759                     cp += 2;
1760                     input_f = UTF32_INPUT;
1761                     input_endian = ENDIAN_BIG;
1762                 } else {
1763                     input_f = UTF8_INPUT;
1764                     continue;
1765                 }
1766                 if (cp[0]=='L') {
1767                     cp++;
1768                     input_endian = ENDIAN_LITTLE;
1769                 } else if (cp[0] == 'B') {
1770                     cp++;
1771                 }
1772             }
1773             continue;
1774 #endif
1775         /* Input code assumption */
1776         case 'J':   /* JIS input */
1777             input_f = JIS_INPUT;
1778             continue;
1779         case 'E':   /* AT&T EUC input */
1780             input_f = EUC_INPUT;
1781             continue;
1782         case 'S':   /* MS Kanji input */
1783             input_f = SJIS_INPUT;
1784             if (x0201_f==NO_X0201) x0201_f=TRUE;
1785             continue;
1786         case 'Z':   /* Convert X0208 alphabet to asii */
1787             /* alpha_f
1788                bit:0   Convert JIS X 0208 Alphabet to ASCII
1789                bit:1   Convert Kankaku to one space
1790                bit:2   Convert Kankaku to two spaces
1791                bit:3   Convert HTML Entity
1792                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1793             */
1794             while ('0'<= *cp && *cp <='9') {
1795                 alpha_f |= 1 << (*cp++ - '0');
1796             }
1797             if (!alpha_f) alpha_f = 1;
1798             continue;
1799         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1800             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1801             /* accept  X0201
1802                     ESC-(-I     in JIS, EUC, MS Kanji
1803                     SI/SO       in JIS, EUC, MS Kanji
1804                     SSO         in EUC, JIS, not in MS Kanji
1805                     MS Kanji (0xa0-0xdf)
1806                output  X0201
1807                     ESC-(-I     in JIS (0x20-0x5f)
1808                     SSO         in EUC (0xa0-0xdf)
1809                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1810             */
1811             continue;
1812         case 'X':   /* Assume X0201 kana */
1813             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1814             x0201_f = TRUE;
1815             continue;
1816         case 'F':   /* prserve new lines */
1817             fold_preserve_f = TRUE;
1818         case 'f':   /* folding -f60 or -f */
1819             fold_f = TRUE;
1820             fold_len = 0;
1821             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1822                 fold_len *= 10;
1823                 fold_len += *cp++ - '0';
1824             }
1825             if (!(0<fold_len && fold_len<BUFSIZ))
1826                 fold_len = DEFAULT_FOLD;
1827             if (*cp=='-') {
1828                 fold_margin = 0;
1829                 cp++;
1830                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1831                     fold_margin *= 10;
1832                     fold_margin += *cp++ - '0';
1833                 }
1834             }
1835             continue;
1836         case 'm':   /* MIME support */
1837             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1838             if (*cp=='B'||*cp=='Q') {
1839                 mime_decode_mode = *cp++;
1840                 mimebuf_f = FIXED_MIME;
1841             } else if (*cp=='N') {
1842                 mime_f = TRUE; cp++;
1843             } else if (*cp=='S') {
1844                 mime_f = STRICT_MIME; cp++;
1845             } else if (*cp=='0') {
1846                 mime_decode_f = FALSE;
1847                 mime_f = FALSE; cp++;
1848             }
1849             continue;
1850         case 'M':   /* MIME output */
1851             if (*cp=='B') {
1852                 mimeout_mode = 'B';
1853                 mimeout_f = FIXED_MIME; cp++;
1854             } else if (*cp=='Q') {
1855                 mimeout_mode = 'Q';
1856                 mimeout_f = FIXED_MIME; cp++;
1857             } else {
1858                 mimeout_f = TRUE;
1859             }
1860             continue;
1861         case 'B':   /* Broken JIS support */
1862             /*  bit:0   no ESC JIS
1863                 bit:1   allow any x on ESC-(-x or ESC-$-x
1864                 bit:2   reset to ascii on NL
1865             */
1866             if ('9'>= *cp && *cp>='0')
1867                 broken_f |= 1<<(*cp++ -'0');
1868             else
1869                 broken_f |= TRUE;
1870             continue;
1871 #ifndef PERL_XS
1872         case 'O':/* for Output file */
1873             file_out_f = TRUE;
1874             continue;
1875 #endif
1876         case 'c':/* add cr code */
1877             nlmode_f = CRLF;
1878             continue;
1879         case 'd':/* delete cr code */
1880             nlmode_f = LF;
1881             continue;
1882         case 'I':   /* ISO-2022-JP output */
1883             iso2022jp_f = TRUE;
1884             continue;
1885         case 'L':  /* line mode */
1886             if (*cp=='u') {         /* unix */
1887                 nlmode_f = LF; cp++;
1888             } else if (*cp=='m') { /* mac */
1889                 nlmode_f = CR; cp++;
1890             } else if (*cp=='w') { /* windows */
1891                 nlmode_f = CRLF; cp++;
1892             } else if (*cp=='0') { /* no conversion  */
1893                 nlmode_f = 0; cp++;
1894             }
1895             continue;
1896 #ifndef PERL_XS
1897         case 'g':
1898             if (*cp == '1') {
1899                 guess_f = 2;
1900                 cp++;
1901             } else if (*cp == '0') {
1902                 guess_f = 1;
1903                 cp++;
1904             } else {
1905                 guess_f = 1;
1906             }
1907             continue;
1908 #endif
1909         case SP:
1910         /* module muliple options in a string are allowed for Perl moudle  */
1911             while(*cp && *cp++!='-');
1912             continue;
1913         default:
1914             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
1915             /* bogus option but ignored */
1916             continue;
1917         }
1918     }
1919 }
1920
1921 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1922 {
1923     if (iconv_func){
1924         struct input_code *p = input_code_list;
1925         while (p->name){
1926             if (iconv_func == p->iconv_func){
1927                 return p;
1928             }
1929             p++;
1930         }
1931     }
1932     return 0;
1933 }
1934
1935 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1936 {
1937 #ifdef INPUT_CODE_FIX
1938     if (f || !input_f)
1939 #endif
1940         if (estab_f != f){
1941             estab_f = f;
1942         }
1943
1944     if (iconv_func
1945 #ifdef INPUT_CODE_FIX
1946         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1947 #endif
1948         ){
1949         iconv = iconv_func;
1950     }
1951 #ifdef CHECK_OPTION
1952     if (estab_f && iconv_for_check != iconv){
1953         struct input_code *p = find_inputcode_byfunc(iconv);
1954         if (p){
1955             set_input_codename(p->name);
1956             debug(p->name);
1957         }
1958         iconv_for_check = iconv;
1959     }
1960 #endif
1961 }
1962
1963 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1964 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1965 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1966 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
1967 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
1968 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1969 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1970 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1971
1972 #define SCORE_INIT (SCORE_iMIME)
1973
1974 static const char score_table_A0[] = {
1975     0, 0, 0, 0,
1976     0, 0, 0, 0,
1977     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1978     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1979 };
1980
1981 static const char score_table_F0[] = {
1982     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1983     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1984     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
1985     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1986 };
1987
1988 void set_code_score(struct input_code *ptr, nkf_char score)
1989 {
1990     if (ptr){
1991         ptr->score |= score;
1992     }
1993 }
1994
1995 void clr_code_score(struct input_code *ptr, nkf_char score)
1996 {
1997     if (ptr){
1998         ptr->score &= ~score;
1999     }
2000 }
2001
2002 void code_score(struct input_code *ptr)
2003 {
2004     nkf_char c2 = ptr->buf[0];
2005 #ifdef UTF8_OUTPUT_ENABLE
2006     nkf_char c1 = ptr->buf[1];
2007 #endif
2008     if (c2 < 0){
2009         set_code_score(ptr, SCORE_ERROR);
2010     }else if (c2 == SSO){
2011         set_code_score(ptr, SCORE_KANA);
2012     }else if (c2 == 0x8f){
2013         set_code_score(ptr, SCORE_X0212);
2014 #ifdef UTF8_OUTPUT_ENABLE
2015     }else if (!e2w_conv(c2, c1)){
2016         set_code_score(ptr, SCORE_NO_EXIST);
2017 #endif
2018     }else if ((c2 & 0x70) == 0x20){
2019         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2020     }else if ((c2 & 0x70) == 0x70){
2021         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2022     }else if ((c2 & 0x70) >= 0x50){
2023         set_code_score(ptr, SCORE_L2);
2024     }
2025 }
2026
2027 void status_disable(struct input_code *ptr)
2028 {
2029     ptr->stat = -1;
2030     ptr->buf[0] = -1;
2031     code_score(ptr);
2032     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2033 }
2034
2035 void status_push_ch(struct input_code *ptr, nkf_char c)
2036 {
2037     ptr->buf[ptr->index++] = c;
2038 }
2039
2040 void status_clear(struct input_code *ptr)
2041 {
2042     ptr->stat = 0;
2043     ptr->index = 0;
2044 }
2045
2046 void status_reset(struct input_code *ptr)
2047 {
2048     status_clear(ptr);
2049     ptr->score = SCORE_INIT;
2050 }
2051
2052 void status_reinit(struct input_code *ptr)
2053 {
2054     status_reset(ptr);
2055     ptr->_file_stat = 0;
2056 }
2057
2058 void status_check(struct input_code *ptr, nkf_char c)
2059 {
2060     if (c <= DEL && estab_f){
2061         status_reset(ptr);
2062     }
2063 }
2064
2065 void s_status(struct input_code *ptr, nkf_char c)
2066 {
2067     switch(ptr->stat){
2068       case -1:
2069           status_check(ptr, c);
2070           break;
2071       case 0:
2072           if (c <= DEL){
2073               break;
2074 #ifdef NUMCHAR_OPTION
2075           }else if (is_unicode_capsule(c)){
2076               break;
2077 #endif
2078           }else if (0xa1 <= c && c <= 0xdf){
2079               status_push_ch(ptr, SSO);
2080               status_push_ch(ptr, c);
2081               code_score(ptr);
2082               status_clear(ptr);
2083           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2084               ptr->stat = 1;
2085               status_push_ch(ptr, c);
2086           }else if (0xed <= c && c <= 0xee){
2087               ptr->stat = 3;
2088               status_push_ch(ptr, c);
2089 #ifdef SHIFTJIS_CP932
2090           }else if (is_ibmext_in_sjis(c)){
2091               ptr->stat = 2;
2092               status_push_ch(ptr, c);
2093 #endif /* SHIFTJIS_CP932 */
2094 #ifdef X0212_ENABLE
2095           }else if (0xf0 <= c && c <= 0xfc){
2096               ptr->stat = 1;
2097               status_push_ch(ptr, c);
2098 #endif /* X0212_ENABLE */
2099           }else{
2100               status_disable(ptr);
2101           }
2102           break;
2103       case 1:
2104           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2105               status_push_ch(ptr, c);
2106               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2107               code_score(ptr);
2108               status_clear(ptr);
2109           }else{
2110               status_disable(ptr);
2111           }
2112           break;
2113       case 2:
2114 #ifdef SHIFTJIS_CP932
2115         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2116             status_push_ch(ptr, c);
2117             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2118                 set_code_score(ptr, SCORE_CP932);
2119                 status_clear(ptr);
2120                 break;
2121             }
2122         }
2123 #endif /* SHIFTJIS_CP932 */
2124         status_disable(ptr);
2125           break;
2126       case 3:
2127           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2128               status_push_ch(ptr, c);
2129               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2130             set_code_score(ptr, SCORE_CP932);
2131             status_clear(ptr);
2132           }else{
2133               status_disable(ptr);
2134           }
2135           break;
2136     }
2137 }
2138
2139 void e_status(struct input_code *ptr, nkf_char c)
2140 {
2141     switch (ptr->stat){
2142       case -1:
2143           status_check(ptr, c);
2144           break;
2145       case 0:
2146           if (c <= DEL){
2147               break;
2148 #ifdef NUMCHAR_OPTION
2149           }else if (is_unicode_capsule(c)){
2150               break;
2151 #endif
2152           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2153               ptr->stat = 1;
2154               status_push_ch(ptr, c);
2155 #ifdef X0212_ENABLE
2156           }else if (0x8f == c){
2157               ptr->stat = 2;
2158               status_push_ch(ptr, c);
2159 #endif /* X0212_ENABLE */
2160           }else{
2161               status_disable(ptr);
2162           }
2163           break;
2164       case 1:
2165           if (0xa1 <= c && c <= 0xfe){
2166               status_push_ch(ptr, c);
2167               code_score(ptr);
2168               status_clear(ptr);
2169           }else{
2170               status_disable(ptr);
2171           }
2172           break;
2173 #ifdef X0212_ENABLE
2174       case 2:
2175           if (0xa1 <= c && c <= 0xfe){
2176               ptr->stat = 1;
2177               status_push_ch(ptr, c);
2178           }else{
2179               status_disable(ptr);
2180           }
2181 #endif /* X0212_ENABLE */
2182     }
2183 }
2184
2185 #ifdef UTF8_INPUT_ENABLE
2186 void w_status(struct input_code *ptr, nkf_char c)
2187 {
2188     switch (ptr->stat){
2189       case -1:
2190           status_check(ptr, c);
2191           break;
2192       case 0:
2193           if (c <= DEL){
2194               break;
2195 #ifdef NUMCHAR_OPTION
2196           }else if (is_unicode_capsule(c)){
2197               break;
2198 #endif
2199           }else if (0xc0 <= c && c <= 0xdf){
2200               ptr->stat = 1;
2201               status_push_ch(ptr, c);
2202           }else if (0xe0 <= c && c <= 0xef){
2203               ptr->stat = 2;
2204               status_push_ch(ptr, c);
2205           }else if (0xf0 <= c && c <= 0xf4){
2206               ptr->stat = 3;
2207               status_push_ch(ptr, c);
2208           }else{
2209               status_disable(ptr);
2210           }
2211           break;
2212       case 1:
2213       case 2:
2214           if (0x80 <= c && c <= 0xbf){
2215               status_push_ch(ptr, c);
2216               if (ptr->index > ptr->stat){
2217                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2218                              && ptr->buf[2] == 0xbf);
2219                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2220                            &ptr->buf[0], &ptr->buf[1]);
2221                   if (!bom){
2222                       code_score(ptr);
2223                   }
2224                   status_clear(ptr);
2225               }
2226           }else{
2227               status_disable(ptr);
2228           }
2229           break;
2230       case 3:
2231         if (0x80 <= c && c <= 0xbf){
2232             if (ptr->index < ptr->stat){
2233                 status_push_ch(ptr, c);
2234             } else {
2235                 status_clear(ptr);
2236             }
2237           }else{
2238               status_disable(ptr);
2239           }
2240           break;
2241     }
2242 }
2243 #endif
2244
2245 void code_status(nkf_char c)
2246 {
2247     int action_flag = 1;
2248     struct input_code *result = 0;
2249     struct input_code *p = input_code_list;
2250     while (p->name){
2251         if (!p->status_func) {
2252             ++p;
2253             continue;
2254         }
2255         if (!p->status_func)
2256             continue;
2257         (p->status_func)(p, c);
2258         if (p->stat > 0){
2259             action_flag = 0;
2260         }else if(p->stat == 0){
2261             if (result){
2262                 action_flag = 0;
2263             }else{
2264                 result = p;
2265             }
2266         }
2267         ++p;
2268     }
2269
2270     if (action_flag){
2271         if (result && !estab_f){
2272             set_iconv(TRUE, result->iconv_func);
2273         }else if (c <= DEL){
2274             struct input_code *ptr = input_code_list;
2275             while (ptr->name){
2276                 status_reset(ptr);
2277                 ++ptr;
2278             }
2279         }
2280     }
2281 }
2282
2283 #ifndef WIN32DLL
2284 nkf_char std_getc(FILE *f)
2285 {
2286     if (std_gc_ndx){
2287         return std_gc_buf[--std_gc_ndx];
2288     }
2289     return getc(f);
2290 }
2291 #endif /*WIN32DLL*/
2292
2293 nkf_char std_ungetc(nkf_char c, FILE *f)
2294 {
2295     if (std_gc_ndx == STD_GC_BUFSIZE){
2296         return EOF;
2297     }
2298     std_gc_buf[std_gc_ndx++] = c;
2299     return c;
2300 }
2301
2302 #ifndef WIN32DLL
2303 void std_putc(nkf_char c)
2304 {
2305     if(c!=EOF)
2306       putchar(c);
2307 }
2308 #endif /*WIN32DLL*/
2309
2310 #if !defined(PERL_XS) && !defined(WIN32DLL)
2311 nkf_char noconvert(FILE *f)
2312 {
2313     nkf_char    c;
2314
2315     if (nop_f == 2)
2316         module_connection();
2317     while ((c = (*i_getc)(f)) != EOF)
2318       (*o_putc)(c);
2319     (*o_putc)(EOF);
2320     return 1;
2321 }
2322 #endif
2323
2324 void module_connection(void)
2325 {
2326     oconv = output_conv;
2327     o_putc = std_putc;
2328
2329     /* replace continucation module, from output side */
2330
2331     /* output redicrection */
2332 #ifdef CHECK_OPTION
2333     if (noout_f || guess_f){
2334         o_putc = no_putc;
2335     }
2336 #endif
2337     if (mimeout_f) {
2338         o_mputc = o_putc;
2339         o_putc = mime_putc;
2340         if (mimeout_f == TRUE) {
2341             o_base64conv = oconv; oconv = base64_conv;
2342         }
2343         /* base64_count = 0; */
2344     }
2345
2346     if (nlmode_f || guess_f) {
2347         o_nlconv = oconv; oconv = nl_conv;
2348     }
2349     if (rot_f) {
2350         o_rot_conv = oconv; oconv = rot_conv;
2351     }
2352     if (iso2022jp_f) {
2353         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2354     }
2355     if (hira_f) {
2356         o_hira_conv = oconv; oconv = hira_conv;
2357     }
2358     if (fold_f) {
2359         o_fconv = oconv; oconv = fold_conv;
2360         f_line = 0;
2361     }
2362     if (alpha_f || x0201_f) {
2363         o_zconv = oconv; oconv = z_conv;
2364     }
2365
2366     i_getc = std_getc;
2367     i_ungetc = std_ungetc;
2368     /* input redicrection */
2369 #ifdef INPUT_OPTION
2370     if (cap_f){
2371         i_cgetc = i_getc; i_getc = cap_getc;
2372         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2373     }
2374     if (url_f){
2375         i_ugetc = i_getc; i_getc = url_getc;
2376         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2377     }
2378 #endif
2379 #ifdef NUMCHAR_OPTION
2380     if (numchar_f){
2381         i_ngetc = i_getc; i_getc = numchar_getc;
2382         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2383     }
2384 #endif
2385 #ifdef UNICODE_NORMALIZATION
2386     if (nfc_f && input_f == UTF8_INPUT){
2387         i_nfc_getc = i_getc; i_getc = nfc_getc;
2388         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2389     }
2390 #endif
2391     if (mime_f && mimebuf_f==FIXED_MIME) {
2392         i_mgetc = i_getc; i_getc = mime_getc;
2393         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2394     }
2395     if (broken_f & 1) {
2396         i_bgetc = i_getc; i_getc = broken_getc;
2397         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2398     }
2399     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2400         set_iconv(-TRUE, e_iconv);
2401     } else if (input_f == SJIS_INPUT) {
2402         set_iconv(-TRUE, s_iconv);
2403 #ifdef UTF8_INPUT_ENABLE
2404     } else if (input_f == UTF8_INPUT) {
2405         set_iconv(-TRUE, w_iconv);
2406     } else if (input_f == UTF16_INPUT) {
2407         set_iconv(-TRUE, w_iconv16);
2408     } else if (input_f == UTF32_INPUT) {
2409         set_iconv(-TRUE, w_iconv32);
2410 #endif
2411     } else {
2412         set_iconv(FALSE, e_iconv);
2413     }
2414
2415     {
2416         struct input_code *p = input_code_list;
2417         while (p->name){
2418             status_reinit(p++);
2419         }
2420     }
2421 }
2422
2423 /*
2424  * Check and Ignore BOM
2425  */
2426 void check_bom(FILE *f)
2427 {
2428     int c2;
2429     switch(c2 = (*i_getc)(f)){
2430     case 0x00:
2431         if((c2 = (*i_getc)(f)) == 0x00){
2432             if((c2 = (*i_getc)(f)) == 0xFE){
2433                 if((c2 = (*i_getc)(f)) == 0xFF){
2434                     if(!input_f){
2435                         set_iconv(TRUE, w_iconv32);
2436                     }
2437                     if (iconv == w_iconv32) {
2438                         input_endian = ENDIAN_BIG;
2439                         return;
2440                     }
2441                     (*i_ungetc)(0xFF,f);
2442                 }else (*i_ungetc)(c2,f);
2443                 (*i_ungetc)(0xFE,f);
2444             }else if(c2 == 0xFF){
2445                 if((c2 = (*i_getc)(f)) == 0xFE){
2446                     if(!input_f){
2447                         set_iconv(TRUE, w_iconv32);
2448                     }
2449                     if (iconv == w_iconv32) {
2450                         input_endian = ENDIAN_2143;
2451                         return;
2452                     }
2453                     (*i_ungetc)(0xFF,f);
2454                 }else (*i_ungetc)(c2,f);
2455                 (*i_ungetc)(0xFF,f);
2456             }else (*i_ungetc)(c2,f);
2457             (*i_ungetc)(0x00,f);
2458         }else (*i_ungetc)(c2,f);
2459         (*i_ungetc)(0x00,f);
2460         break;
2461     case 0xEF:
2462         if((c2 = (*i_getc)(f)) == 0xBB){
2463             if((c2 = (*i_getc)(f)) == 0xBF){
2464                 if(!input_f){
2465                     set_iconv(TRUE, w_iconv);
2466                 }
2467                 if (iconv == w_iconv) {
2468                     return;
2469                 }
2470                 (*i_ungetc)(0xBF,f);
2471             }else (*i_ungetc)(c2,f);
2472             (*i_ungetc)(0xBB,f);
2473         }else (*i_ungetc)(c2,f);
2474         (*i_ungetc)(0xEF,f);
2475         break;
2476     case 0xFE:
2477         if((c2 = (*i_getc)(f)) == 0xFF){
2478             if((c2 = (*i_getc)(f)) == 0x00){
2479                 if((c2 = (*i_getc)(f)) == 0x00){
2480                     if(!input_f){
2481                         set_iconv(TRUE, w_iconv32);
2482                     }
2483                     if (iconv == w_iconv32) {
2484                         input_endian = ENDIAN_3412;
2485                         return;
2486                     }
2487                     (*i_ungetc)(0x00,f);
2488                 }else (*i_ungetc)(c2,f);
2489                 (*i_ungetc)(0x00,f);
2490             }else (*i_ungetc)(c2,f);
2491             if(!input_f){
2492                 set_iconv(TRUE, w_iconv16);
2493             }
2494             if (iconv == w_iconv16) {
2495                 input_endian = ENDIAN_BIG;
2496                 return;
2497             }
2498             (*i_ungetc)(0xFF,f);
2499         }else (*i_ungetc)(c2,f);
2500         (*i_ungetc)(0xFE,f);
2501         break;
2502     case 0xFF:
2503         if((c2 = (*i_getc)(f)) == 0xFE){
2504             if((c2 = (*i_getc)(f)) == 0x00){
2505                 if((c2 = (*i_getc)(f)) == 0x00){
2506                     if(!input_f){
2507                         set_iconv(TRUE, w_iconv32);
2508                     }
2509                     if (iconv == w_iconv32) {
2510                         input_endian = ENDIAN_LITTLE;
2511                         return;
2512                     }
2513                     (*i_ungetc)(0x00,f);
2514                 }else (*i_ungetc)(c2,f);
2515                 (*i_ungetc)(0x00,f);
2516             }else (*i_ungetc)(c2,f);
2517             if(!input_f){
2518                 set_iconv(TRUE, w_iconv16);
2519             }
2520             if (iconv == w_iconv16) {
2521                 input_endian = ENDIAN_LITTLE;
2522                 return;
2523             }
2524             (*i_ungetc)(0xFE,f);
2525         }else (*i_ungetc)(c2,f);
2526         (*i_ungetc)(0xFF,f);
2527         break;
2528     default:
2529         (*i_ungetc)(c2,f);
2530         break;
2531     }
2532 }
2533
2534 /*
2535    Conversion main loop. Code detection only.
2536  */
2537
2538 nkf_char kanji_convert(FILE *f)
2539 {
2540     nkf_char    c3, c2=0, c1, c0=0;
2541     int is_8bit = FALSE;
2542
2543     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2544 #ifdef UTF8_INPUT_ENABLE
2545        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2546 #endif
2547       ){
2548         is_8bit = TRUE;
2549     }
2550
2551     input_mode = ASCII;
2552     output_mode = ASCII;
2553     shift_mode = FALSE;
2554
2555 #define NEXT continue      /* no output, get next */
2556 #define SEND ;             /* output c1 and c2, get next */
2557 #define LAST break         /* end of loop, go closing  */
2558
2559     module_connection();
2560     check_bom(f);
2561
2562     while ((c1 = (*i_getc)(f)) != EOF) {
2563 #ifdef INPUT_CODE_FIX
2564         if (!input_f)
2565 #endif
2566             code_status(c1);
2567         if (c2) {
2568             /* second byte */
2569             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2570                 /* in case of 8th bit is on */
2571                 if (!estab_f&&!mime_decode_mode) {
2572                     /* in case of not established yet */
2573                     /* It is still ambiguious */
2574                     if (h_conv(f, c2, c1)==EOF)
2575                         LAST;
2576                     else
2577                         c2 = 0;
2578                     NEXT;
2579                 } else {
2580                     /* in case of already established */
2581                     if (c1 < AT) {
2582                         /* ignore bogus code and not CP5022x UCD */
2583                         c2 = 0;
2584                         NEXT;
2585                     } else {
2586                         SEND;
2587                     }
2588                 }
2589             } else
2590                 /* second byte, 7 bit code */
2591                 /* it might be kanji shitfted */
2592                 if ((c1 == DEL) || (c1 <= SP)) {
2593                     /* ignore bogus first code */
2594                     c2 = 0;
2595                     NEXT;
2596                 } else
2597                     SEND;
2598         } else {
2599             /* first byte */
2600 #ifdef UTF8_INPUT_ENABLE
2601             if (iconv == w_iconv16) {
2602                 if (input_endian == ENDIAN_BIG) {
2603                     c2 = c1;
2604                     if ((c1 = (*i_getc)(f)) != EOF) {
2605                         if (0xD8 <= c2 && c2 <= 0xDB) {
2606                             if ((c0 = (*i_getc)(f)) != EOF) {
2607                                 c0 <<= 8;
2608                                 if ((c3 = (*i_getc)(f)) != EOF) {
2609                                     c0 |= c3;
2610                                 } else c2 = EOF;
2611                             } else c2 = EOF;
2612                         }
2613                     } else c2 = EOF;
2614                 } else {
2615                     if ((c2 = (*i_getc)(f)) != EOF) {
2616                         if (0xD8 <= c2 && c2 <= 0xDB) {
2617                             if ((c3 = (*i_getc)(f)) != EOF) {
2618                                 if ((c0 = (*i_getc)(f)) != EOF) {
2619                                     c0 <<= 8;
2620                                     c0 |= c3;
2621                                 } else c2 = EOF;
2622                             } else c2 = EOF;
2623                         }
2624                     } else c2 = EOF;
2625                 }
2626                 SEND;
2627             } else if(iconv == w_iconv32){
2628                 int c3 = c1;
2629                 if((c2 = (*i_getc)(f)) != EOF &&
2630                    (c1 = (*i_getc)(f)) != EOF &&
2631                    (c0 = (*i_getc)(f)) != EOF){
2632                     switch(input_endian){
2633                     case ENDIAN_BIG:
2634                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2635                         break;
2636                     case ENDIAN_LITTLE:
2637                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2638                         break;
2639                     case ENDIAN_2143:
2640                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2641                         break;
2642                     case ENDIAN_3412:
2643                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2644                         break;
2645                     }
2646                     c2 = 0;
2647                 }else{
2648                     c2 = EOF;
2649                 }
2650                 SEND;
2651             } else
2652 #endif
2653 #ifdef NUMCHAR_OPTION
2654             if (is_unicode_capsule(c1)){
2655                 SEND;
2656             } else
2657 #endif
2658             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2659                 /* 8 bit code */
2660                 if (!estab_f && !iso8859_f) {
2661                     /* not established yet */
2662                     c2 = c1;
2663                     NEXT;
2664                 } else { /* estab_f==TRUE */
2665                     if (iso8859_f) {
2666                         c2 = ISO8859_1;
2667                         c1 &= 0x7f;
2668                         SEND;
2669                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2670                         /* SJIS X0201 Case... */
2671                         if(iso2022jp_f && x0201_f==NO_X0201) {
2672                             (*oconv)(GETA1, GETA2);
2673                             NEXT;
2674                         } else {
2675                             c2 = X0201;
2676                             c1 &= 0x7f;
2677                             SEND;
2678                         }
2679                     } else if (c1==SSO && iconv != s_iconv) {
2680                         /* EUC X0201 Case */
2681                         c1 = (*i_getc)(f);  /* skip SSO */
2682                         code_status(c1);
2683                         if (SSP<=c1 && c1<0xe0) {
2684                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2685                                 (*oconv)(GETA1, GETA2);
2686                                 NEXT;
2687                             } else {
2688                                 c2 = X0201;
2689                                 c1 &= 0x7f;
2690                                 SEND;
2691                             }
2692                         } else  { /* bogus code, skip SSO and one byte */
2693                             NEXT;
2694                         }
2695                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2696                                (c1 == 0xFD || c1 == 0xFE)) {
2697                         /* CP10001 */
2698                         c2 = X0201;
2699                         c1 &= 0x7f;
2700                         SEND;
2701                     } else {
2702                        /* already established */
2703                        c2 = c1;
2704                        NEXT;
2705                     }
2706                 }
2707             } else if ((c1 > SP) && (c1 != DEL)) {
2708                 /* in case of Roman characters */
2709                 if (shift_mode) {
2710                     /* output 1 shifted byte */
2711                     if (iso8859_f) {
2712                         c2 = ISO8859_1;
2713                         SEND;
2714                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2715                       /* output 1 shifted byte */
2716                         if(iso2022jp_f && x0201_f==NO_X0201) {
2717                             (*oconv)(GETA1, GETA2);
2718                             NEXT;
2719                         } else {
2720                             c2 = X0201;
2721                             SEND;
2722                         }
2723                     } else {
2724                         /* look like bogus code */
2725                         NEXT;
2726                     }
2727                 } else if (input_mode == X0208 || input_mode == X0212 ||
2728                            input_mode == X0213_1 || input_mode == X0213_2) {
2729                     /* in case of Kanji shifted */
2730                     c2 = c1;
2731                     NEXT;
2732                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2733                     /* Check MIME code */
2734                     if ((c1 = (*i_getc)(f)) == EOF) {
2735                         (*oconv)(0, '=');
2736                         LAST;
2737                     } else if (c1 == '?') {
2738                         /* =? is mime conversion start sequence */
2739                         if(mime_f == STRICT_MIME) {
2740                             /* check in real detail */
2741                             if (mime_begin_strict(f) == EOF)
2742                                 LAST;
2743                             else
2744                                 NEXT;
2745                         } else if (mime_begin(f) == EOF)
2746                             LAST;
2747                         else
2748                             NEXT;
2749                     } else {
2750                         (*oconv)(0, '=');
2751                         (*i_ungetc)(c1,f);
2752                         NEXT;
2753                     }
2754                 } else {
2755                     /* normal ASCII code */
2756                     SEND;
2757                 }
2758             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2759                 shift_mode = FALSE;
2760                 NEXT;
2761             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2762                 shift_mode = TRUE;
2763                 NEXT;
2764             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2765                 if ((c1 = (*i_getc)(f)) == EOF) {
2766                     /*  (*oconv)(0, ESC); don't send bogus code */
2767                     LAST;
2768                 } else if (c1 == '$') {
2769                     if ((c1 = (*i_getc)(f)) == EOF) {
2770                         /*
2771                         (*oconv)(0, ESC); don't send bogus code
2772                         (*oconv)(0, '$'); */
2773                         LAST;
2774                     } else if (c1 == '@'|| c1 == 'B') {
2775                         /* This is kanji introduction */
2776                         input_mode = X0208;
2777                         shift_mode = FALSE;
2778                         set_input_codename("ISO-2022-JP");
2779 #ifdef CHECK_OPTION
2780                         debug("ISO-2022-JP");
2781 #endif
2782                         NEXT;
2783                     } else if (c1 == '(') {
2784                         if ((c1 = (*i_getc)(f)) == EOF) {
2785                             /* don't send bogus code
2786                             (*oconv)(0, ESC);
2787                             (*oconv)(0, '$');
2788                             (*oconv)(0, '(');
2789                                 */
2790                             LAST;
2791                         } else if (c1 == '@'|| c1 == 'B') {
2792                             /* This is kanji introduction */
2793                             input_mode = X0208;
2794                             shift_mode = FALSE;
2795                             NEXT;
2796 #ifdef X0212_ENABLE
2797                         } else if (c1 == 'D'){
2798                             input_mode = X0212;
2799                             shift_mode = FALSE;
2800                             NEXT;
2801 #endif /* X0212_ENABLE */
2802                         } else if (c1 == (X0213_1&0x7F)){
2803                             input_mode = X0213_1;
2804                             shift_mode = FALSE;
2805                             NEXT;
2806                         } else if (c1 == (X0213_2&0x7F)){
2807                             input_mode = X0213_2;
2808                             shift_mode = FALSE;
2809                             NEXT;
2810                         } else {
2811                             /* could be some special code */
2812                             (*oconv)(0, ESC);
2813                             (*oconv)(0, '$');
2814                             (*oconv)(0, '(');
2815                             (*oconv)(0, c1);
2816                             NEXT;
2817                         }
2818                     } else if (broken_f&0x2) {
2819                         /* accept any ESC-(-x as broken code ... */
2820                         input_mode = X0208;
2821                         shift_mode = FALSE;
2822                         NEXT;
2823                     } else {
2824                         (*oconv)(0, ESC);
2825                         (*oconv)(0, '$');
2826                         (*oconv)(0, c1);
2827                         NEXT;
2828                     }
2829                 } else if (c1 == '(') {
2830                     if ((c1 = (*i_getc)(f)) == EOF) {
2831                         /* don't send bogus code
2832                         (*oconv)(0, ESC);
2833                         (*oconv)(0, '('); */
2834                         LAST;
2835                     } else {
2836                         if (c1 == 'I') {
2837                             /* This is X0201 kana introduction */
2838                             input_mode = X0201; shift_mode = X0201;
2839                             NEXT;
2840                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2841                             /* This is X0208 kanji introduction */
2842                             input_mode = ASCII; shift_mode = FALSE;
2843                             NEXT;
2844                         } else if (broken_f&0x2) {
2845                             input_mode = ASCII; shift_mode = FALSE;
2846                             NEXT;
2847                         } else {
2848                             (*oconv)(0, ESC);
2849                             (*oconv)(0, '(');
2850                             /* maintain various input_mode here */
2851                             SEND;
2852                         }
2853                     }
2854                } else if ( c1 == 'N' || c1 == 'n'){
2855                    /* SS2 */
2856                    c3 = (*i_getc)(f);  /* skip SS2 */
2857                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2858                        c1 = c3;
2859                        c2 = X0201;
2860                        SEND;
2861                    }else{
2862                        (*i_ungetc)(c3, f);
2863                        /* lonely ESC  */
2864                        (*oconv)(0, ESC);
2865                        SEND;
2866                    }
2867                 } else {
2868                     /* lonely ESC  */
2869                     (*oconv)(0, ESC);
2870                     SEND;
2871                 }
2872             } else if (c1 == ESC && iconv == s_iconv) {
2873                 /* ESC in Shift_JIS */
2874                 if ((c1 = (*i_getc)(f)) == EOF) {
2875                     /*  (*oconv)(0, ESC); don't send bogus code */
2876                     LAST;
2877                 } else if (c1 == '$') {
2878                     /* J-PHONE emoji */
2879                     if ((c1 = (*i_getc)(f)) == EOF) {
2880                         /*
2881                            (*oconv)(0, ESC); don't send bogus code
2882                            (*oconv)(0, '$'); */
2883                         LAST;
2884                     } else {
2885                         if (('E' <= c1 && c1 <= 'G') ||
2886                             ('O' <= c1 && c1 <= 'Q')) {
2887                             /*
2888                                NUM : 0 1 2 3 4 5
2889                                BYTE: G E F O P Q
2890                                C%7 : 1 6 0 2 3 4
2891                                C%7 : 0 1 2 3 4 5 6
2892                                NUM : 2 0 3 4 5 X 1
2893                              */
2894                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2895                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2896                             while ((c1 = (*i_getc)(f)) != EOF) {
2897                                 if (SP <= c1 && c1 <= 'z') {
2898                                     (*oconv)(0, c1 + c0);
2899                                 } else break; /* c1 == SO */
2900                             }
2901                         }
2902                     }
2903                     if (c1 == EOF) LAST;
2904                     NEXT;
2905                 } else {
2906                     /* lonely ESC  */
2907                     (*oconv)(0, ESC);
2908                     SEND;
2909                 }
2910             } else if (c1 == LF || c1 == CR) {
2911                 if (broken_f&4) {
2912                     input_mode = ASCII; set_iconv(FALSE, 0);
2913                     SEND;
2914                 } else if (mime_decode_f && !mime_decode_mode){
2915                     if (c1 == LF) {
2916                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2917                             i_ungetc(SP,f);
2918                             continue;
2919                         } else {
2920                             i_ungetc(c1,f);
2921                         }
2922                         c1 = LF;
2923                         SEND;
2924                     } else  { /* if (c1 == CR)*/
2925                         if ((c1=(*i_getc)(f))!=EOF) {
2926                             if (c1==SP) {
2927                                 i_ungetc(SP,f);
2928                                 continue;
2929                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
2930                                 i_ungetc(SP,f);
2931                                 continue;
2932                             } else {
2933                                 i_ungetc(c1,f);
2934                             }
2935                             i_ungetc(LF,f);
2936                         } else {
2937                             i_ungetc(c1,f);
2938                         }
2939                         c1 = CR;
2940                         SEND;
2941                     }
2942                 }
2943             } else if (c1 == DEL && input_mode == X0208) {
2944                 /* CP5022x */
2945                 c2 = c1;
2946                 NEXT;
2947             } else
2948                 SEND;
2949         }
2950         /* send: */
2951         switch(input_mode){
2952         case ASCII:
2953             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2954             case -2:
2955                 /* 4 bytes UTF-8 */
2956                 if ((c0 = (*i_getc)(f)) != EOF) {
2957                     code_status(c0);
2958                     c0 <<= 8;
2959                     if ((c3 = (*i_getc)(f)) != EOF) {
2960                         code_status(c3);
2961                         (*iconv)(c2, c1, c0|c3);
2962                     }
2963                 }
2964                 break;
2965             case -1:
2966                 /* 3 bytes EUC or UTF-8 */
2967                 if ((c0 = (*i_getc)(f)) != EOF) {
2968                     code_status(c0);
2969                     (*iconv)(c2, c1, c0);
2970                 }
2971                 break;
2972             }
2973             break;
2974         case X0208:
2975         case X0213_1:
2976             if (ms_ucs_map_f &&
2977                 0x7F <= c2 && c2 <= 0x92 &&
2978                 0x21 <= c1 && c1 <= 0x7E) {
2979                 /* CP932 UDC */
2980                 if(c1 == 0x7F) return 0;
2981                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2982                 c2 = 0;
2983             }
2984             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2985             break;
2986 #ifdef X0212_ENABLE
2987         case X0212:
2988             (*oconv)(PREFIX_EUCG3 | c2, c1);
2989             break;
2990 #endif /* X0212_ENABLE */
2991         case X0213_2:
2992             (*oconv)(PREFIX_EUCG3 | c2, c1);
2993             break;
2994         default:
2995             (*oconv)(input_mode, c1);  /* other special case */
2996         }
2997
2998         c2 = 0;
2999         c0 = 0;
3000         continue;
3001         /* goto next_word */
3002     }
3003
3004     /* epilogue */
3005     (*iconv)(EOF, 0, 0);
3006     if (!input_codename)
3007     {
3008         if (is_8bit) {
3009             struct input_code *p = input_code_list;
3010             struct input_code *result = p;
3011             while (p->name){
3012                 if (p->score < result->score) result = p;
3013                 ++p;
3014             }
3015             set_input_codename(result->name);
3016 #ifdef CHECK_OPTION
3017             debug(result->name);
3018 #endif
3019         }
3020     }
3021     return 1;
3022 }
3023
3024 nkf_char
3025 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3026 {
3027     nkf_char ret, c3, c0;
3028     int hold_index;
3029
3030
3031     /** it must NOT be in the kanji shifte sequence      */
3032     /** it must NOT be written in JIS7                   */
3033     /** and it must be after 2 byte 8bit code            */
3034
3035     hold_count = 0;
3036     push_hold_buf(c2);
3037     push_hold_buf(c1);
3038
3039     while ((c1 = (*i_getc)(f)) != EOF) {
3040         if (c1 == ESC){
3041             (*i_ungetc)(c1,f);
3042             break;
3043         }
3044         code_status(c1);
3045         if (push_hold_buf(c1) == EOF || estab_f){
3046             break;
3047         }
3048     }
3049
3050     if (!estab_f){
3051         struct input_code *p = input_code_list;
3052         struct input_code *result = p;
3053         if (c1 == EOF){
3054             code_status(c1);
3055         }
3056         while (p->name){
3057             if (p->status_func && p->score < result->score){
3058                 result = p;
3059             }
3060             ++p;
3061         }
3062         set_iconv(TRUE, result->iconv_func);
3063     }
3064
3065
3066     /** now,
3067      ** 1) EOF is detected, or
3068      ** 2) Code is established, or
3069      ** 3) Buffer is FULL (but last word is pushed)
3070      **
3071      ** in 1) and 3) cases, we continue to use
3072      ** Kanji codes by oconv and leave estab_f unchanged.
3073      **/
3074
3075     ret = c1;
3076     hold_index = 0;
3077     while (hold_index < hold_count){
3078         c2 = hold_buf[hold_index++];
3079         if (c2 <= DEL
3080 #ifdef NUMCHAR_OPTION
3081             || is_unicode_capsule(c2)
3082 #endif
3083             ){
3084             (*iconv)(0, c2, 0);
3085             continue;
3086         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3087             (*iconv)(X0201, c2, 0);
3088             continue;
3089         }
3090         if (hold_index < hold_count){
3091             c1 = hold_buf[hold_index++];
3092         }else{
3093             c1 = (*i_getc)(f);
3094             if (c1 == EOF){
3095                 c3 = EOF;
3096                 break;
3097             }
3098             code_status(c1);
3099         }
3100         c0 = 0;
3101         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3102         case -2:
3103             /* 4 bytes UTF-8 */
3104             if (hold_index < hold_count){
3105                 c0 = hold_buf[hold_index++];
3106             } else if ((c0 = (*i_getc)(f)) == EOF) {
3107                 ret = EOF;
3108                 break;
3109             } else {
3110                 code_status(c0);
3111                 c0 <<= 8;
3112                 if (hold_index < hold_count){
3113                     c3 = hold_buf[hold_index++];
3114                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3115                     c0 = ret = EOF;
3116                     break;
3117                 } else {
3118                     code_status(c3);
3119                     (*iconv)(c2, c1, c0|c3);
3120                 }
3121             }
3122             break;
3123         case -1:
3124             /* 3 bytes EUC or UTF-8 */
3125             if (hold_index < hold_count){
3126                 c0 = hold_buf[hold_index++];
3127             } else if ((c0 = (*i_getc)(f)) == EOF) {
3128                 ret = EOF;
3129                 break;
3130             } else {
3131                 code_status(c0);
3132             }
3133             (*iconv)(c2, c1, c0);
3134             break;
3135         }
3136         if (c0 == EOF) break;
3137     }
3138     return ret;
3139 }
3140
3141 nkf_char push_hold_buf(nkf_char c2)
3142 {
3143     if (hold_count >= HOLD_SIZE*2)
3144         return (EOF);
3145     hold_buf[hold_count++] = (unsigned char)c2;
3146     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3147 }
3148
3149 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3150 {
3151 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3152     nkf_char val;
3153 #endif
3154     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3155 #ifdef SHIFTJIS_CP932
3156     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3157         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3158         if (val){
3159             c2 = val >> 8;
3160             c1 = val & 0xff;
3161         }
3162     }
3163     if (cp932inv_f
3164         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3165         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3166         if (c){
3167             c2 = c >> 8;
3168             c1 = c & 0xff;
3169         }
3170     }
3171 #endif /* SHIFTJIS_CP932 */
3172 #ifdef X0212_ENABLE
3173     if (!x0213_f && is_ibmext_in_sjis(c2)){
3174         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3175         if (val){
3176             if (val > 0x7FFF){
3177                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3178                 c1 = val & 0xff;
3179             }else{
3180                 c2 = val >> 8;
3181                 c1 = val & 0xff;
3182             }
3183             if (p2) *p2 = c2;
3184             if (p1) *p1 = c1;
3185             return 0;
3186         }
3187     }
3188 #endif
3189     if(c2 >= 0x80){
3190         if(x0213_f && c2 >= 0xF0){
3191             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3192                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3193             }else{ /* 78<=k<=94 */
3194                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3195                 if (0x9E < c1) c2++;
3196             }
3197         }else{
3198             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3199             if (0x9E < c1) c2++;
3200         }
3201         if (c1 < 0x9F)
3202             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3203         else {
3204             c1 = c1 - 0x7E;
3205         }
3206     }
3207
3208 #ifdef X0212_ENABLE
3209     c2 = x0212_unshift(c2);
3210 #endif
3211     if (p2) *p2 = c2;
3212     if (p1) *p1 = c1;
3213     return 0;
3214 }
3215
3216 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3217 {
3218     if (c2 == X0201) {
3219         c1 &= 0x7f;
3220     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3221         /* NOP */
3222     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3223         /* CP932 UDC */
3224         if(c1 == 0x7F) return 0;
3225         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3226         c2 = 0;
3227     } else {
3228         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3229         if (ret) return ret;
3230     }
3231     (*oconv)(c2, c1);
3232     return 0;
3233 }
3234
3235 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3236 {
3237     if (c2 == X0201) {
3238         c1 &= 0x7f;
3239 #ifdef X0212_ENABLE
3240     }else if (c2 == 0x8f){
3241         if (c0 == 0){
3242             return -1;
3243         }
3244         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3245             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3246             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3247             c2 = 0;
3248         } else {
3249             c2 = (c2 << 8) | (c1 & 0x7f);
3250             c1 = c0 & 0x7f;
3251 #ifdef SHIFTJIS_CP932
3252             if (cp51932_f){
3253                 nkf_char s2, s1;
3254                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3255                     s2e_conv(s2, s1, &c2, &c1);
3256                     if (c2 < 0x100){
3257                         c1 &= 0x7f;
3258                         c2 &= 0x7f;
3259                     }
3260                 }
3261             }
3262 #endif /* SHIFTJIS_CP932 */
3263         }
3264 #endif /* X0212_ENABLE */
3265     } else if (c2 == SSO){
3266         c2 = X0201;
3267         c1 &= 0x7f;
3268     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3269         /* NOP */
3270     } else {
3271         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3272             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3273             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3274             c2 = 0;
3275         } else {
3276             c1 &= 0x7f;
3277             c2 &= 0x7f;
3278 #ifdef SHIFTJIS_CP932
3279             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3280                 nkf_char s2, s1;
3281                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3282                     s2e_conv(s2, s1, &c2, &c1);
3283                     if (c2 < 0x100){
3284                         c1 &= 0x7f;
3285                         c2 &= 0x7f;
3286                     }
3287                 }
3288             }
3289 #endif /* SHIFTJIS_CP932 */
3290         }
3291     }
3292     (*oconv)(c2, c1);
3293     return 0;
3294 }
3295
3296 #ifdef UTF8_INPUT_ENABLE
3297 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3298 {
3299     nkf_char ret = 0;
3300
3301     if (!c1){
3302         *p2 = 0;
3303         *p1 = c2;
3304     }else if (0xc0 <= c2 && c2 <= 0xef) {
3305         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3306 #ifdef NUMCHAR_OPTION
3307         if (ret > 0){
3308             if (p2) *p2 = 0;
3309             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3310             ret = 0;
3311         }
3312 #endif
3313     }
3314     return ret;
3315 }
3316
3317 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3318 {
3319     nkf_char ret = 0;
3320     static const char w_iconv_utf8_1st_byte[] =
3321     { /* 0xC0 - 0xFF */
3322         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3323         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3324         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3325         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3326
3327     if (c2 < 0 || 0xff < c2) {
3328     }else if (c2 == 0) { /* 0 : 1 byte*/
3329         c0 = 0;
3330     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3331         return 0;
3332     } else{
3333         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3334         case 21:
3335             if (c1 < 0x80 || 0xBF < c1) return 0;
3336             break;
3337         case 30:
3338             if (c0 == 0) return -1;
3339             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3340                 return 0;
3341             break;
3342         case 31:
3343         case 33:
3344             if (c0 == 0) return -1;
3345             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3346                 return 0;
3347             break;
3348         case 32:
3349             if (c0 == 0) return -1;
3350             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3351                 return 0;
3352             break;
3353         case 40:
3354             if (c0 == 0) return -2;
3355             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3356                 return 0;
3357             break;
3358         case 41:
3359             if (c0 == 0) return -2;
3360             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3361                 return 0;
3362             break;
3363         case 42:
3364             if (c0 == 0) return -2;
3365             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3366                 return 0;
3367             break;
3368         default:
3369             return 0;
3370             break;
3371         }
3372     }
3373     if (c2 == 0 || c2 == EOF){
3374     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3375         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3376         c2 = 0;
3377     } else {
3378         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3379     }
3380     if (ret == 0){
3381         (*oconv)(c2, c1);
3382     }
3383     return ret;
3384 }
3385 #endif
3386
3387 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3388 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3389 {
3390     val &= VALUE_MASK;
3391     if (val < 0x80){
3392         *p2 = val;
3393         *p1 = 0;
3394         *p0 = 0;
3395     }else if (val < 0x800){
3396         *p2 = 0xc0 | (val >> 6);
3397         *p1 = 0x80 | (val & 0x3f);
3398         *p0 = 0;
3399     } else if (val <= NKF_INT32_C(0xFFFF)) {
3400         *p2 = 0xe0 | (val >> 12);
3401         *p1 = 0x80 | ((val >> 6) & 0x3f);
3402         *p0 = 0x80 | (val        & 0x3f);
3403     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3404         *p2 = 0xe0 |  (val >> 16);
3405         *p1 = 0x80 | ((val >> 12) & 0x3f);
3406         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3407     } else {
3408         *p2 = 0;
3409         *p1 = 0;
3410         *p0 = 0;
3411     }
3412 }
3413 #endif
3414
3415 #ifdef UTF8_INPUT_ENABLE
3416 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3417 {
3418     nkf_char val;
3419     if (c2 >= 0xf8) {
3420         val = -1;
3421     } else if (c2 >= 0xf0){
3422         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3423         val = (c2 & 0x0f) << 18;
3424         val |= (c1 & 0x3f) << 12;
3425         val |= (c0 & 0x3f00) >> 2;
3426         val |= (c0 & 0x3f);
3427     }else if (c2 >= 0xe0){
3428         val = (c2 & 0x0f) << 12;
3429         val |= (c1 & 0x3f) << 6;
3430         val |= (c0 & 0x3f);
3431     }else if (c2 >= 0xc0){
3432         val = (c2 & 0x1f) << 6;
3433         val |= (c1 & 0x3f);
3434     }else{
3435         val = c2;
3436     }
3437     return val;
3438 }
3439
3440 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3441 {
3442     nkf_char c2, c1, c0;
3443     nkf_char ret = 0;
3444     val &= VALUE_MASK;
3445     if (val < 0x80){
3446         *p2 = 0;
3447         *p1 = val;
3448     }else{
3449         w16w_conv(val, &c2, &c1, &c0);
3450         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3451 #ifdef NUMCHAR_OPTION
3452         if (ret > 0){
3453             *p2 = 0;
3454             *p1 = CLASS_UNICODE | val;
3455             ret = 0;
3456         }
3457 #endif
3458     }
3459     return ret;
3460 }
3461 #endif
3462
3463 #ifdef UTF8_INPUT_ENABLE
3464 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3465 {
3466     nkf_char ret = 0;
3467     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3468         (*oconv)(c2, c1);
3469         return 0;
3470     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3471         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3472             return -2;
3473         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3474         c2 = 0;
3475     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3476         /*
3477            return 2;
3478         */
3479         return 1;
3480     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3481     if (ret) return ret;
3482     (*oconv)(c2, c1);
3483     return 0;
3484 }
3485
3486 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3487 {
3488     int ret = 0;
3489
3490     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3491     } else if (is_unicode_bmp(c1)) {
3492         ret = w16e_conv(c1, &c2, &c1);
3493     } else {
3494         c2 = 0;
3495         c1 =  CLASS_UNICODE | c1;
3496     }
3497     if (ret) return ret;
3498     (*oconv)(c2, c1);
3499     return 0;
3500 }
3501
3502 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3503 {
3504     const unsigned short *const *pp;
3505     const unsigned short *const *const *ppp;
3506     static const char no_best_fit_chars_table_C2[] =
3507     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3508         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3509         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3510         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3511     static const char no_best_fit_chars_table_C2_ms[] =
3512     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3513         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3514         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3515         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3516     static const char no_best_fit_chars_table_932_C2[] =
3517     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3518         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3519         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3520         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3521     static const char no_best_fit_chars_table_932_C3[] =
3522     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3523         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3524         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3525         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3526     nkf_char ret = 0;
3527
3528     if(c2 < 0x80){
3529         *p2 = 0;
3530         *p1 = c2;
3531     }else if(c2 < 0xe0){
3532         if(no_best_fit_chars_f){
3533             if(ms_ucs_map_f == UCS_MAP_CP932){
3534                 switch(c2){
3535                 case 0xC2:
3536                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3537                     break;
3538                 case 0xC3:
3539                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3540                     break;
3541                 }
3542             }else if(!cp932inv_f){
3543                 switch(c2){
3544                 case 0xC2:
3545                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3546                     break;
3547                 case 0xC3:
3548                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3549                     break;
3550                 }
3551             }else if(ms_ucs_map_f == UCS_MAP_MS){
3552                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3553             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3554                 switch(c2){
3555                 case 0xC2:
3556                     switch(c1){
3557                     case 0xA2:
3558                     case 0xA3:
3559                     case 0xA5:
3560                     case 0xA6:
3561                     case 0xAC:
3562                     case 0xAF:
3563                     case 0xB8:
3564                         return 1;
3565                     }
3566                     break;
3567                 }
3568             }
3569         }
3570         pp =
3571             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3572             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3573             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3574             utf8_to_euc_2bytes;
3575         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3576     }else if(c0 < 0xF0){
3577         if(no_best_fit_chars_f){
3578             if(ms_ucs_map_f == UCS_MAP_CP932){
3579                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3580             }else if(ms_ucs_map_f == UCS_MAP_MS){
3581                 switch(c2){
3582                 case 0xE2:
3583                     switch(c1){
3584                     case 0x80:
3585                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3586                         break;
3587                     case 0x88:
3588                         if(c0 == 0x92) return 1;
3589                         break;
3590                     }
3591                     break;
3592                 case 0xE3:
3593                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3594                     break;
3595                 }
3596             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3597                 switch(c2){
3598                 case 0xE3:
3599                     switch(c1){
3600                     case 0x82:
3601                             if(c0 == 0x94) return 1;
3602                         break;
3603                     case 0x83:
3604                             if(c0 == 0xBB) return 1;
3605                         break;
3606                     }
3607                     break;
3608                 }
3609             }else{
3610                 switch(c2){
3611                 case 0xE2:
3612                     switch(c1){
3613                     case 0x80:
3614                         if(c0 == 0x95) return 1;
3615                         break;
3616                     case 0x88:
3617                         if(c0 == 0xA5) return 1;
3618                         break;
3619                     }
3620                     break;
3621                 case 0xEF:
3622                     switch(c1){
3623                     case 0xBC:
3624                         if(c0 == 0x8D) return 1;
3625                         break;
3626                     case 0xBD:
3627                         if(c0 == 0x9E && !cp932inv_f) return 1;
3628                         break;
3629                     case 0xBF:
3630                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3631                         break;
3632                     }
3633                     break;
3634                 }
3635             }
3636         }
3637         ppp =
3638             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3639             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3640             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3641             utf8_to_euc_3bytes;
3642         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3643     }else return -1;
3644 #ifdef SHIFTJIS_CP932
3645     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3646         nkf_char s2, s1;
3647         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3648             s2e_conv(s2, s1, p2, p1);
3649         }else{
3650             ret = 1;
3651         }
3652     }
3653 #endif
3654     return ret;
3655 }
3656
3657 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3658 {
3659     nkf_char c2;
3660     const unsigned short *p;
3661     unsigned short val;
3662
3663     if (pp == 0) return 1;
3664
3665     c1 -= 0x80;
3666     if (c1 < 0 || psize <= c1) return 1;
3667     p = pp[c1];
3668     if (p == 0)  return 1;
3669
3670     c0 -= 0x80;
3671     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3672     val = p[c0];
3673     if (val == 0) return 1;
3674     if (no_cp932ext_f && (
3675         (val>>8) == 0x2D || /* NEC special characters */
3676         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3677         )) return 1;
3678
3679     c2 = val >> 8;
3680    if (val > 0x7FFF){
3681         c2 &= 0x7f;
3682         c2 |= PREFIX_EUCG3;
3683     }
3684     if (c2 == SO) c2 = X0201;
3685     c1 = val & 0x7f;
3686     if (p2) *p2 = c2;
3687     if (p1) *p1 = c1;
3688     return 0;
3689 }
3690
3691 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3692 {
3693     int shift = 20;
3694     c &= VALUE_MASK;
3695     while(shift >= 0){
3696         if(c >= 1<<shift){
3697             while(shift >= 0){
3698                 (*f)(0, bin2hex(c>>shift));
3699                 shift -= 4;
3700             }
3701         }else{
3702             shift -= 4;
3703         }
3704     }
3705     return;
3706 }
3707
3708 void encode_fallback_html(nkf_char c)
3709 {
3710     (*oconv)(0, '&');
3711     (*oconv)(0, '#');
3712     c &= VALUE_MASK;
3713     if(c >= NKF_INT32_C(1000000))
3714         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3715     if(c >= NKF_INT32_C(100000))
3716         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3717     if(c >= 10000)
3718         (*oconv)(0, 0x30+(c/10000  )%10);
3719     if(c >= 1000)
3720         (*oconv)(0, 0x30+(c/1000   )%10);
3721     if(c >= 100)
3722         (*oconv)(0, 0x30+(c/100    )%10);
3723     if(c >= 10)
3724         (*oconv)(0, 0x30+(c/10     )%10);
3725     if(c >= 0)
3726         (*oconv)(0, 0x30+ c         %10);
3727     (*oconv)(0, ';');
3728     return;
3729 }
3730
3731 void encode_fallback_xml(nkf_char c)
3732 {
3733     (*oconv)(0, '&');
3734     (*oconv)(0, '#');
3735     (*oconv)(0, 'x');
3736     nkf_each_char_to_hex(oconv, c);
3737     (*oconv)(0, ';');
3738     return;
3739 }
3740
3741 void encode_fallback_java(nkf_char c)
3742 {
3743     (*oconv)(0, '\\');
3744     c &= VALUE_MASK;
3745     if(!is_unicode_bmp(c)){
3746         (*oconv)(0, 'U');
3747         (*oconv)(0, '0');
3748         (*oconv)(0, '0');
3749         (*oconv)(0, bin2hex(c>>20));
3750         (*oconv)(0, bin2hex(c>>16));
3751     }else{
3752         (*oconv)(0, 'u');
3753     }
3754     (*oconv)(0, bin2hex(c>>12));
3755     (*oconv)(0, bin2hex(c>> 8));
3756     (*oconv)(0, bin2hex(c>> 4));
3757     (*oconv)(0, bin2hex(c    ));
3758     return;
3759 }
3760
3761 void encode_fallback_perl(nkf_char c)
3762 {
3763     (*oconv)(0, '\\');
3764     (*oconv)(0, 'x');
3765     (*oconv)(0, '{');
3766     nkf_each_char_to_hex(oconv, c);
3767     (*oconv)(0, '}');
3768     return;
3769 }
3770
3771 void encode_fallback_subchar(nkf_char c)
3772 {
3773     c = unicode_subchar;
3774     (*oconv)((c>>8)&0xFF, c&0xFF);
3775     return;
3776 }
3777 #endif
3778
3779 #ifdef UTF8_OUTPUT_ENABLE
3780 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3781 {
3782     const unsigned short *p;
3783
3784     if (c2 == X0201) {
3785         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3786             switch (c1) {
3787             case 0x20:
3788                 return 0xA0;
3789             case 0x7D:
3790                 return 0xA9;
3791             }
3792         }
3793         p = euc_to_utf8_1byte;
3794 #ifdef X0212_ENABLE
3795     } else if (is_eucg3(c2)){
3796         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3797             return 0xA6;
3798         }
3799         c2 = (c2&0x7f) - 0x21;
3800         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3801             p = x0212_to_utf8_2bytes[c2];
3802         else
3803             return 0;
3804 #endif
3805     } else {
3806         c2 &= 0x7f;
3807         c2 = (c2&0x7f) - 0x21;
3808         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3809             p =
3810                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3811                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3812                 euc_to_utf8_2bytes_ms[c2];
3813         else
3814             return 0;
3815     }
3816     if (!p) return 0;
3817     c1 = (c1 & 0x7f) - 0x21;
3818     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3819         return p[c1];
3820     return 0;
3821 }
3822
3823 void w_oconv(nkf_char c2, nkf_char c1)
3824 {
3825     nkf_char c0;
3826     nkf_char val;
3827
3828     if (output_bom_f) {
3829         output_bom_f = FALSE;
3830         (*o_putc)('\357');
3831         (*o_putc)('\273');
3832         (*o_putc)('\277');
3833     }
3834
3835     if (c2 == EOF) {
3836         (*o_putc)(EOF);
3837         return;
3838     }
3839
3840 #ifdef NUMCHAR_OPTION
3841     if (c2 == 0 && is_unicode_capsule(c1)){
3842         val = c1 & VALUE_MASK;
3843         if (val < 0x80){
3844             (*o_putc)(val);
3845         }else if (val < 0x800){
3846             (*o_putc)(0xC0 | (val >> 6));
3847             (*o_putc)(0x80 | (val & 0x3f));
3848         } else if (val <= NKF_INT32_C(0xFFFF)) {
3849             (*o_putc)(0xE0 | (val >> 12));
3850             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3851             (*o_putc)(0x80 | (val        & 0x3f));
3852         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3853             (*o_putc)(0xF0 | ( val>>18));
3854             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3855             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3856             (*o_putc)(0x80 | ( val      & 0x3f));
3857         }
3858         return;
3859     }
3860 #endif
3861
3862     if (c2 == 0) {
3863         output_mode = ASCII;
3864         (*o_putc)(c1);
3865     } else if (c2 == ISO8859_1) {
3866         output_mode = ISO8859_1;
3867         (*o_putc)(c1 | 0x080);
3868     } else {
3869         output_mode = UTF8;
3870         val = e2w_conv(c2, c1);
3871         if (val){
3872             w16w_conv(val, &c2, &c1, &c0);
3873             (*o_putc)(c2);
3874             if (c1){
3875                 (*o_putc)(c1);
3876                 if (c0) (*o_putc)(c0);
3877             }
3878         }
3879     }
3880 }
3881
3882 void w_oconv16(nkf_char c2, nkf_char c1)
3883 {
3884     if (output_bom_f) {
3885         output_bom_f = FALSE;
3886         if (output_endian == ENDIAN_LITTLE){
3887             (*o_putc)((unsigned char)'\377');
3888             (*o_putc)('\376');
3889         }else{
3890             (*o_putc)('\376');
3891             (*o_putc)((unsigned char)'\377');
3892         }
3893     }
3894
3895     if (c2 == EOF) {
3896         (*o_putc)(EOF);
3897         return;
3898     }
3899
3900     if (c2 == ISO8859_1) {
3901         c2 = 0;
3902         c1 |= 0x80;
3903 #ifdef NUMCHAR_OPTION
3904     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3905         if (is_unicode_bmp(c1)) {
3906             c2 = (c1 >> 8) & 0xff;
3907             c1 &= 0xff;
3908         } else {
3909             c1 &= VALUE_MASK;
3910             if (c1 <= UNICODE_MAX) {
3911                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3912                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3913                 if (output_endian == ENDIAN_LITTLE){
3914                     (*o_putc)(c2 & 0xff);
3915                     (*o_putc)((c2 >> 8) & 0xff);
3916                     (*o_putc)(c1 & 0xff);
3917                     (*o_putc)((c1 >> 8) & 0xff);
3918                 }else{
3919                     (*o_putc)((c2 >> 8) & 0xff);
3920                     (*o_putc)(c2 & 0xff);
3921                     (*o_putc)((c1 >> 8) & 0xff);
3922                     (*o_putc)(c1 & 0xff);
3923                 }
3924             }
3925             return;
3926         }
3927 #endif
3928     } else if (c2) {
3929         nkf_char val = e2w_conv(c2, c1);
3930         c2 = (val >> 8) & 0xff;
3931         c1 = val & 0xff;
3932         if (!val) return;
3933     }
3934     if (output_endian == ENDIAN_LITTLE){
3935         (*o_putc)(c1);
3936         (*o_putc)(c2);
3937     }else{
3938         (*o_putc)(c2);
3939         (*o_putc)(c1);
3940     }
3941 }
3942
3943 void w_oconv32(nkf_char c2, nkf_char c1)
3944 {
3945     if (output_bom_f) {
3946         output_bom_f = FALSE;
3947         if (output_endian == ENDIAN_LITTLE){
3948             (*o_putc)((unsigned char)'\377');
3949             (*o_putc)('\376');
3950             (*o_putc)('\000');
3951             (*o_putc)('\000');
3952         }else{
3953             (*o_putc)('\000');
3954             (*o_putc)('\000');
3955             (*o_putc)('\376');
3956             (*o_putc)((unsigned char)'\377');
3957         }
3958     }
3959
3960     if (c2 == EOF) {
3961         (*o_putc)(EOF);
3962         return;
3963     }
3964
3965     if (c2 == ISO8859_1) {
3966         c1 |= 0x80;
3967 #ifdef NUMCHAR_OPTION
3968     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3969         c1 &= VALUE_MASK;
3970 #endif
3971     } else if (c2) {
3972         c1 = e2w_conv(c2, c1);
3973         if (!c1) return;
3974     }
3975     if (output_endian == ENDIAN_LITTLE){
3976         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3977         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3978         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3979         (*o_putc)('\000');
3980     }else{
3981         (*o_putc)('\000');
3982         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3983         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3984         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3985     }
3986 }
3987 #endif
3988
3989 void e_oconv(nkf_char c2, nkf_char c1)
3990 {
3991 #ifdef NUMCHAR_OPTION
3992     if (c2 == 0 && is_unicode_capsule(c1)){
3993         w16e_conv(c1, &c2, &c1);
3994         if (c2 == 0 && is_unicode_capsule(c1)){
3995             c2 = c1 & VALUE_MASK;
3996             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
3997                 /* eucJP-ms UDC */
3998                 c1 &= 0xFFF;
3999                 c2 = c1 / 94;
4000                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
4001                 c1 = 0x21 + c1 % 94;
4002                 if (is_eucg3(c2)){
4003                     (*o_putc)(0x8f);
4004                     (*o_putc)((c2 & 0x7f) | 0x080);
4005                     (*o_putc)(c1 | 0x080);
4006                 }else{
4007                     (*o_putc)((c2 & 0x7f) | 0x080);
4008                     (*o_putc)(c1 | 0x080);
4009                 }
4010                 return;
4011             } else {
4012                 if (encode_fallback) (*encode_fallback)(c1);
4013                 return;
4014             }
4015         }
4016     }
4017 #endif
4018     if (c2 == EOF) {
4019         (*o_putc)(EOF);
4020         return;
4021     } else if (c2 == 0) {
4022         output_mode = ASCII;
4023         (*o_putc)(c1);
4024     } else if (c2 == X0201) {
4025         output_mode = JAPANESE_EUC;
4026         (*o_putc)(SSO); (*o_putc)(c1|0x80);
4027     } else if (c2 == ISO8859_1) {
4028         output_mode = ISO8859_1;
4029         (*o_putc)(c1 | 0x080);
4030 #ifdef X0212_ENABLE
4031     } else if (is_eucg3(c2)){
4032         output_mode = JAPANESE_EUC;
4033 #ifdef SHIFTJIS_CP932
4034         if (!cp932inv_f){
4035             nkf_char s2, s1;
4036             if (e2s_conv(c2, c1, &s2, &s1) == 0){
4037                 s2e_conv(s2, s1, &c2, &c1);
4038             }
4039         }
4040 #endif
4041         if (c2 == 0) {
4042             output_mode = ASCII;
4043             (*o_putc)(c1);
4044         }else if (is_eucg3(c2)){
4045             if (x0212_f){
4046                 (*o_putc)(0x8f);
4047                 (*o_putc)((c2 & 0x7f) | 0x080);
4048                 (*o_putc)(c1 | 0x080);
4049             }
4050         }else{
4051             (*o_putc)((c2 & 0x7f) | 0x080);
4052             (*o_putc)(c1 | 0x080);
4053         }
4054 #endif
4055     } else {
4056         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
4057             set_iconv(FALSE, 0);
4058             return; /* too late to rescue this char */
4059         }
4060         output_mode = JAPANESE_EUC;
4061         (*o_putc)(c2 | 0x080);
4062         (*o_putc)(c1 | 0x080);
4063     }
4064 }
4065
4066 #ifdef X0212_ENABLE
4067 nkf_char x0212_shift(nkf_char c)
4068 {
4069     nkf_char ret = c;
4070     c &= 0x7f;
4071     if (is_eucg3(ret)){
4072         if (0x75 <= c && c <= 0x7f){
4073             ret = c + (0x109 - 0x75);
4074         }
4075     }else{
4076         if (0x75 <= c && c <= 0x7f){
4077             ret = c + (0x113 - 0x75);
4078         }
4079     }
4080     return ret;
4081 }
4082
4083
4084 nkf_char x0212_unshift(nkf_char c)
4085 {
4086     nkf_char ret = c;
4087     if (0x7f <= c && c <= 0x88){
4088         ret = c + (0x75 - 0x7f);
4089     }else if (0x89 <= c && c <= 0x92){
4090         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4091     }
4092     return ret;
4093 }
4094 #endif /* X0212_ENABLE */
4095
4096 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4097 {
4098     nkf_char ndx;
4099     if (is_eucg3(c2)){
4100         ndx = c2 & 0x7f;
4101         if (x0213_f){
4102             if((0x21 <= ndx && ndx <= 0x2F)){
4103                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4104                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4105                 return 0;
4106             }else if(0x6E <= ndx && ndx <= 0x7E){
4107                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4108                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4109                 return 0;
4110             }
4111             return 1;
4112         }
4113 #ifdef X0212_ENABLE
4114         else if(nkf_isgraph(ndx)){
4115             nkf_char val = 0;
4116             const unsigned short *ptr;
4117             ptr = x0212_shiftjis[ndx - 0x21];
4118             if (ptr){
4119                 val = ptr[(c1 & 0x7f) - 0x21];
4120             }
4121             if (val){
4122                 c2 = val >> 8;
4123                 c1 = val & 0xff;
4124                 if (p2) *p2 = c2;
4125                 if (p1) *p1 = c1;
4126                 return 0;
4127             }
4128             c2 = x0212_shift(c2);
4129         }
4130 #endif /* X0212_ENABLE */
4131     }
4132     if(0x7F < c2) return 1;
4133     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4134     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4135     return 0;
4136 }
4137
4138 void s_oconv(nkf_char c2, nkf_char c1)
4139 {
4140 #ifdef NUMCHAR_OPTION
4141     if (c2 == 0 && is_unicode_capsule(c1)){
4142         w16e_conv(c1, &c2, &c1);
4143         if (c2 == 0 && is_unicode_capsule(c1)){
4144             c2 = c1 & VALUE_MASK;
4145             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
4146                 /* CP932 UDC */
4147                 c1 &= 0xFFF;
4148                 c2 = c1 / 188 + 0xF0;
4149                 c1 = c1 % 188;
4150                 c1 += 0x40 + (c1 > 0x3e);
4151                 (*o_putc)(c2);
4152                 (*o_putc)(c1);
4153                 return;
4154             } else {
4155                 if(encode_fallback)(*encode_fallback)(c1);
4156                 return;
4157             }
4158         }
4159     }
4160 #endif
4161     if (c2 == EOF) {
4162         (*o_putc)(EOF);
4163         return;
4164     } else if (c2 == 0) {
4165         output_mode = ASCII;
4166         (*o_putc)(c1);
4167     } else if (c2 == X0201) {
4168         output_mode = SHIFT_JIS;
4169         (*o_putc)(c1|0x80);
4170     } else if (c2 == ISO8859_1) {
4171         output_mode = ISO8859_1;
4172         (*o_putc)(c1 | 0x080);
4173 #ifdef X0212_ENABLE
4174     } else if (is_eucg3(c2)){
4175         output_mode = SHIFT_JIS;
4176         if (e2s_conv(c2, c1, &c2, &c1) == 0){
4177             (*o_putc)(c2);
4178             (*o_putc)(c1);
4179         }
4180 #endif
4181     } else {
4182         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4183             set_iconv(FALSE, 0);
4184             return; /* too late to rescue this char */
4185         }
4186         output_mode = SHIFT_JIS;
4187         e2s_conv(c2, c1, &c2, &c1);
4188
4189 #ifdef SHIFTJIS_CP932
4190         if (cp932inv_f
4191             && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
4192             nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
4193             if (c){
4194                 c2 = c >> 8;
4195                 c1 = c & 0xff;
4196             }
4197         }
4198 #endif /* SHIFTJIS_CP932 */
4199
4200         (*o_putc)(c2);