OSDN Git Service

* Add tests for Next line convertion.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.138 2007/10/01 19:55:25 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-10-01"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
43 #define MSDOS
44 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
45 #define __WIN32__
46 #endif
47 #endif
48
49 #ifdef PERL_XS
50 #undef OVERWRITE
51 #endif
52
53 #ifndef PERL_XS
54 #include <stdio.h>
55 #endif
56
57 #include <stdlib.h>
58 #include <string.h>
59
60 #if defined(MSDOS) || defined(__OS2__)
61 #include <fcntl.h>
62 #include <io.h>
63 #if defined(_MSC_VER) || defined(__WATCOMC__)
64 #define mktemp _mktemp
65 #endif
66 #endif
67
68 #ifdef MSDOS
69 #ifdef LSI_C
70 #define setbinmode(fp) fsetbin(fp)
71 #elif defined(__DJGPP__)
72 #include <libc/dosio.h>
73 #define setbinmode(fp) djgpp_setbinmode(fp)
74 #else /* Microsoft C, Turbo C */
75 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
76 #endif
77 #else /* UNIX */
78 #define setbinmode(fp)
79 #endif
80
81 #if defined(__DJGPP__)
82 void  djgpp_setbinmode(FILE *fp)
83 {
84     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
85     int fd, m;
86     fd = fileno(fp);
87     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
88     __file_handle_set(fd, m);
89 }
90 #endif
91
92 #ifdef _IOFBF /* SysV and MSDOS, Windows */
93 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
94 #else /* BSD */
95 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
96 #endif
97
98 /*Borland C++ 4.5 EasyWin*/
99 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
100 #define         EASYWIN
101 #ifndef __WIN16__
102 #define __WIN16__
103 #endif
104 #include <windows.h>
105 #endif
106
107 #ifdef OVERWRITE
108 /* added by satoru@isoternet.org */
109 #if defined(__EMX__)
110 #include <sys/types.h>
111 #endif
112 #include <sys/stat.h>
113 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
114 #include <unistd.h>
115 #if defined(__WATCOMC__)
116 #include <sys/utime.h>
117 #else
118 #include <utime.h>
119 #endif
120 #else /* defined(MSDOS) */
121 #ifdef __WIN32__
122 #ifdef __BORLANDC__ /* BCC32 */
123 #include <utime.h>
124 #else /* !defined(__BORLANDC__) */
125 #include <sys/utime.h>
126 #endif /* (__BORLANDC__) */
127 #else /* !defined(__WIN32__) */
128 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
129 #include <sys/utime.h>
130 #elif defined(__TURBOC__) /* BCC */
131 #include <utime.h>
132 #elif defined(LSI_C) /* LSI C */
133 #endif /* (__WIN32__) */
134 #endif
135 #endif
136 #endif
137
138 #define         FALSE   0
139 #define         TRUE    1
140
141 /* state of output_mode and input_mode
142
143    c2           0 means ASCII
144                 X0201
145                 ISO8859_1
146                 X0208
147                 EOF      all termination
148    c1           32bit data
149
150  */
151
152 #define         ASCII           0
153 #define         X0208           1
154 #define         X0201           2
155 #define         ISO8859_1       8
156 #define         NO_X0201        3
157 #define         X0212      0x2844
158 #define         X0213_1    0x284F
159 #define         X0213_2    0x2850
160
161 /* Input Assumption */
162
163 #define         JIS_INPUT       4
164 #define         EUC_INPUT      16
165 #define         SJIS_INPUT      5
166 #define         LATIN1_INPUT    6
167 #define         FIXED_MIME      7
168 #define         STRICT_MIME     8
169
170 /* MIME ENCODE */
171
172 #define         ISO2022JP       9
173 #define         JAPANESE_EUC   10
174 #define         SHIFT_JIS      11
175
176 #define         UTF8           12
177 #define         UTF8_INPUT     13
178 #define         UTF16_INPUT    1015
179 #define         UTF32_INPUT    1017
180
181 /* byte order */
182
183 #define         ENDIAN_BIG      1234
184 #define         ENDIAN_LITTLE   4321
185 #define         ENDIAN_2143     2143
186 #define         ENDIAN_3412     3412
187
188 #define         WISH_TRUE      15
189
190 /* ASCII CODE */
191
192 #define         BS      0x08
193 #define         TAB     0x09
194 #define         LF      0x0a
195 #define         CR      0x0d
196 #define         ESC     0x1b
197 #define         SP      0x20
198 #define         AT      0x40
199 #define         SSP     0xa0
200 #define         DEL     0x7f
201 #define         SI      0x0f
202 #define         SO      0x0e
203 #define         SSO     0x8e
204 #define         SS3     0x8f
205 #define         CRLF    0x0D0A
206
207 #define         is_alnum(c)  \
208             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
209
210 /* I don't trust portablity of toupper */
211 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
212 #define nkf_isoctal(c)  ('0'<=c && c<='7')
213 #define nkf_isdigit(c)  ('0'<=c && c<='9')
214 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
215 #define nkf_isblank(c) (c == SP || c == TAB)
216 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
217 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
218 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
219 #define nkf_isprint(c) (SP<=c && c<='~')
220 #define nkf_isgraph(c) ('!'<=c && c<='~')
221 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
222                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
223                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
224 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
225 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
226
227 #define CP932_TABLE_BEGIN 0xFA
228 #define CP932_TABLE_END   0xFC
229 #define CP932INV_TABLE_BEGIN 0xED
230 #define CP932INV_TABLE_END   0xEE
231 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
232
233 #define         HOLD_SIZE       1024
234 #if defined(INT_IS_SHORT)
235 #define         IOBUF_SIZE      2048
236 #else
237 #define         IOBUF_SIZE      16384
238 #endif
239
240 #define         DEFAULT_J       'B'
241 #define         DEFAULT_R       'B'
242
243 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
244 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
245
246 #define         RANGE_NUM_MAX   18
247 #define         GETA1   0x22
248 #define         GETA2   0x2e
249
250
251 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
252 #define sizeof_euc_to_utf8_1byte 94
253 #define sizeof_euc_to_utf8_2bytes 94
254 #define sizeof_utf8_to_euc_C2 64
255 #define sizeof_utf8_to_euc_E5B8 64
256 #define sizeof_utf8_to_euc_2bytes 112
257 #define sizeof_utf8_to_euc_3bytes 16
258 #endif
259
260 /* MIME preprocessor */
261
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
264 #endif
265
266 struct input_code{
267     char *name;
268     nkf_char stat;
269     nkf_char score;
270     nkf_char index;
271     nkf_char buf[3];
272     void (*status_func)(struct input_code *, nkf_char);
273     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
274     int _file_stat;
275 };
276
277 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
278
279 #ifndef PERL_XS
280 static const char *CopyRight = COPY_RIGHT;
281 #endif
282 #if !defined(PERL_XS) && !defined(WIN32DLL)
283 static  nkf_char     noconvert(FILE *f);
284 #endif
285 static  void    module_connection(void);
286 static  nkf_char     kanji_convert(FILE *f);
287 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
288 static  nkf_char     push_hold_buf(nkf_char c2);
289 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
290 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
291 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
292 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
293 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
294 /* UCS Mapping
295  * 0: Shift_JIS, eucJP-ascii
296  * 1: eucJP-ms
297  * 2: CP932, CP51932
298  * 3: CP10001
299  */
300 #define UCS_MAP_ASCII   0
301 #define UCS_MAP_MS      1
302 #define UCS_MAP_CP932   2
303 #define UCS_MAP_CP10001 3
304 static int ms_ucs_map_f = UCS_MAP_ASCII;
305 #endif
306 #ifdef UTF8_INPUT_ENABLE
307 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
308 static  int     no_cp932ext_f = FALSE;
309 /* ignore ZERO WIDTH NO-BREAK SPACE */
310 static  int     no_best_fit_chars_f = FALSE;
311 static  int     input_endian = ENDIAN_BIG;
312 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
313 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
314 static  void    encode_fallback_html(nkf_char c);
315 static  void    encode_fallback_xml(nkf_char c);
316 static  void    encode_fallback_java(nkf_char c);
317 static  void    encode_fallback_perl(nkf_char c);
318 static  void    encode_fallback_subchar(nkf_char c);
319 static  void    (*encode_fallback)(nkf_char c) = NULL;
320 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
321 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
322 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
323 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
324 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
325 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
326 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
327 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
328 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
329 static  void    w_status(struct input_code *, nkf_char);
330 #endif
331 #ifdef UTF8_OUTPUT_ENABLE
332 static  int     output_bom_f = FALSE;
333 static  int     output_endian = ENDIAN_BIG;
334 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
335 static  void    w_oconv(nkf_char c2,nkf_char c1);
336 static  void    w_oconv16(nkf_char c2,nkf_char c1);
337 static  void    w_oconv32(nkf_char c2,nkf_char c1);
338 #endif
339 static  void    e_oconv(nkf_char c2,nkf_char c1);
340 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
341 static  void    s_oconv(nkf_char c2,nkf_char c1);
342 static  void    j_oconv(nkf_char c2,nkf_char c1);
343 static  void    fold_conv(nkf_char c2,nkf_char c1);
344 static  void    nl_conv(nkf_char c2,nkf_char c1);
345 static  void    z_conv(nkf_char c2,nkf_char c1);
346 static  void    rot_conv(nkf_char c2,nkf_char c1);
347 static  void    hira_conv(nkf_char c2,nkf_char c1);
348 static  void    base64_conv(nkf_char c2,nkf_char c1);
349 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
350 static  void    no_connection(nkf_char c2,nkf_char c1);
351 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
352
353 static  void    code_score(struct input_code *ptr);
354 static  void    code_status(nkf_char c);
355
356 static  void    std_putc(nkf_char c);
357 static  nkf_char     std_getc(FILE *f);
358 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
359
360 static  nkf_char     broken_getc(FILE *f);
361 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
362
363 static  nkf_char     mime_begin(FILE *f);
364 static  nkf_char     mime_getc(FILE *f);
365 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
366
367 static  void    switch_mime_getc(void);
368 static  void    unswitch_mime_getc(void);
369 static  nkf_char     mime_begin_strict(FILE *f);
370 static  nkf_char     mime_getc_buf(FILE *f);
371 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
372 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
373
374 static  nkf_char     base64decode(nkf_char c);
375 static  void    mime_prechar(nkf_char c2, nkf_char c1);
376 static  void    mime_putc(nkf_char c);
377 static  void    open_mime(nkf_char c);
378 static  void    close_mime(void);
379 static  void    eof_mime(void);
380 static  void    mimeout_addchar(nkf_char c);
381 #ifndef PERL_XS
382 static  void    usage(void);
383 static  void    version(void);
384 #endif
385 static  void    options(unsigned char *c);
386 #if defined(PERL_XS) || defined(WIN32DLL)
387 static  void    reinit(void);
388 #endif
389
390 /* buffers */
391
392 #if !defined(PERL_XS) && !defined(WIN32DLL)
393 static unsigned char   stdibuf[IOBUF_SIZE];
394 static unsigned char   stdobuf[IOBUF_SIZE];
395 #endif
396 static unsigned char   hold_buf[HOLD_SIZE*2];
397 static int             hold_count = 0;
398
399 /* MIME preprocessor fifo */
400
401 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
402 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
403 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
404 static unsigned char           mime_buf[MIME_BUF_SIZE];
405 static unsigned int            mime_top = 0;
406 static unsigned int            mime_last = 0;  /* decoded */
407 static unsigned int            mime_input = 0; /* undecoded */
408 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
409
410 /* flags */
411 static int             unbuf_f = FALSE;
412 static int             estab_f = FALSE;
413 static int             nop_f = FALSE;
414 static int             binmode_f = TRUE;       /* binary mode */
415 static int             rot_f = FALSE;          /* rot14/43 mode */
416 static int             hira_f = FALSE;          /* hira/kata henkan */
417 static int             input_f = FALSE;        /* non fixed input code  */
418 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
419 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
420 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
421 static int             mimebuf_f = FALSE;      /* MIME buffered input */
422 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
423 static int             iso8859_f = FALSE;      /* ISO8859 through */
424 static int             mimeout_f = FALSE;       /* base64 mode */
425 #if defined(MSDOS) || defined(__OS2__)
426 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
427 #else
428 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
429 #endif
430 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
431
432 #ifdef UNICODE_NORMALIZATION
433 static int nfc_f = FALSE;
434 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
435 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
436 static nkf_char nfc_getc(FILE *f);
437 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
438 #endif
439
440 #ifdef INPUT_OPTION
441 static int cap_f = FALSE;
442 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
443 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
444 static nkf_char cap_getc(FILE *f);
445 static nkf_char cap_ungetc(nkf_char c,FILE *f);
446
447 static int url_f = FALSE;
448 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
449 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
450 static nkf_char url_getc(FILE *f);
451 static nkf_char url_ungetc(nkf_char c,FILE *f);
452 #endif
453
454 #if defined(INT_IS_SHORT)
455 #define NKF_INT32_C(n)   (n##L)
456 #else
457 #define NKF_INT32_C(n)   (n)
458 #endif
459 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
460 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
461 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
462 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
463 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
464 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
465 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
466
467 #ifdef NUMCHAR_OPTION
468 static int numchar_f = FALSE;
469 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
470 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
471 static nkf_char numchar_getc(FILE *f);
472 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
473 #endif
474
475 #ifdef CHECK_OPTION
476 static int noout_f = FALSE;
477 static void no_putc(nkf_char c);
478 static nkf_char debug_f = FALSE;
479 static void debug(const char *str);
480 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
481 #endif
482
483 static int guess_f = FALSE;
484 #if !defined PERL_XS
485 static  void    print_guessed_code(char *filename);
486 #endif
487 static  void    set_input_codename(char *codename);
488 static int is_inputcode_mixed = FALSE;
489
490 #ifdef EXEC_IO
491 static int exec_f = 0;
492 #endif
493
494 #ifdef SHIFTJIS_CP932
495 /* invert IBM extended characters to others */
496 static int cp51932_f = FALSE;
497
498 /* invert NEC-selected IBM extended characters to IBM extended characters */
499 static int cp932inv_f = TRUE;
500
501 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
502 #endif /* SHIFTJIS_CP932 */
503
504 #ifdef X0212_ENABLE
505 static int x0212_f = FALSE;
506 static nkf_char x0212_shift(nkf_char c);
507 static nkf_char x0212_unshift(nkf_char c);
508 #endif
509 static int x0213_f = FALSE;
510
511 static unsigned char prefix_table[256];
512
513 static void set_code_score(struct input_code *ptr, nkf_char score);
514 static void clr_code_score(struct input_code *ptr, nkf_char score);
515 static void status_disable(struct input_code *ptr);
516 static void status_push_ch(struct input_code *ptr, nkf_char c);
517 static void status_clear(struct input_code *ptr);
518 static void status_reset(struct input_code *ptr);
519 static void status_reinit(struct input_code *ptr);
520 static void status_check(struct input_code *ptr, nkf_char c);
521 static void e_status(struct input_code *, nkf_char);
522 static void s_status(struct input_code *, nkf_char);
523
524 struct input_code input_code_list[] = {
525     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
526     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
527 #ifdef UTF8_INPUT_ENABLE
528     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
529     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
530     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
531 #endif
532     {0}
533 };
534
535 static int              mimeout_mode = 0;
536 static int              base64_count = 0;
537
538 /* X0208 -> ASCII converter */
539
540 /* fold parameter */
541 static int             f_line = 0;    /* chars in line */
542 static int             f_prev = 0;
543 static int             fold_preserve_f = FALSE; /* preserve new lines */
544 static int             fold_f  = FALSE;
545 static int             fold_len  = 0;
546
547 /* options */
548 static unsigned char   kanji_intro = DEFAULT_J;
549 static unsigned char   ascii_intro = DEFAULT_R;
550
551 /* Folding */
552
553 #define FOLD_MARGIN  10
554 #define DEFAULT_FOLD 60
555
556 static int             fold_margin  = FOLD_MARGIN;
557
558 /* converters */
559
560 #ifdef DEFAULT_CODE_JIS
561 #   define  DEFAULT_CONV j_oconv
562 #endif
563 #ifdef DEFAULT_CODE_SJIS
564 #   define  DEFAULT_CONV s_oconv
565 #endif
566 #ifdef DEFAULT_CODE_EUC
567 #   define  DEFAULT_CONV e_oconv
568 #endif
569 #ifdef DEFAULT_CODE_UTF8
570 #   define  DEFAULT_CONV w_oconv
571 #endif
572
573 /* process default */
574 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
575
576 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
577 /* s_iconv or oconv */
578 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
579
580 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
581 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
582 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
583 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
584 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
585 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
586 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
587
588 /* static redirections */
589
590 static  void   (*o_putc)(nkf_char c) = std_putc;
591
592 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
593 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
594
595 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
596 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
597
598 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
599
600 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
601 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
602
603 /* for strict mime */
604 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
605 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
606
607 /* Global states */
608 static int output_mode = ASCII,    /* output kanji mode */
609            input_mode =  ASCII,    /* input kanji mode */
610            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
611 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
612
613 /* X0201 / X0208 conversion tables */
614
615 /* X0201 kana conversion table */
616 /* 90-9F A0-DF */
617 static const unsigned char cv[]= {
618     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
619     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
620     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
621     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
622     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
623     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
624     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
625     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
626     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
627     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
628     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
629     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
630     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
631     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
632     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
633     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
634     0x00,0x00};
635
636
637 /* X0201 kana conversion table for daguten */
638 /* 90-9F A0-DF */
639 static const unsigned char dv[]= {
640     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
645     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
646     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
647     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
648     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
649     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
650     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
651     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
652     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
653     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
654     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
656     0x00,0x00};
657
658 /* X0201 kana conversion table for han-daguten */
659 /* 90-9F A0-DF */
660 static const unsigned char ev[]= {
661     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
672     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
673     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
674     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
675     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
677     0x00,0x00};
678
679
680 /* X0208 kigou conversion table */
681 /* 0x8140 - 0x819e */
682 static const unsigned char fv[] = {
683
684     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
685     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
686     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
687     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
688     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
689     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
690     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
691     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
692     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
693     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
694     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
695     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
696 } ;
697
698
699
700 static int             file_out_f = FALSE;
701 #ifdef OVERWRITE
702 static int             overwrite_f = FALSE;
703 static int             preserve_time_f = FALSE;
704 static int             backup_f = FALSE;
705 static char            *backup_suffix = "";
706 static char *get_backup_filename(const char *suffix, const char *filename);
707 #endif
708
709 static int nlmode_f = 0;   /* CR, LF, CRLF */
710 static int input_nextline = 0; /* 0: unestablished, EOF: MIXED */
711 static nkf_char prev_cr = 0; /* CR or 0 */
712 #ifdef EASYWIN /*Easy Win */
713 static int             end_check;
714 #endif /*Easy Win */
715
716 #define STD_GC_BUFSIZE (256)
717 nkf_char std_gc_buf[STD_GC_BUFSIZE];
718 nkf_char std_gc_ndx;
719
720 #ifdef WIN32DLL
721 #include "nkf32dll.c"
722 #elif defined(PERL_XS)
723 #else /* WIN32DLL */
724 int main(int argc, char **argv)
725 {
726     FILE  *fin;
727     unsigned char  *cp;
728
729     char *outfname = NULL;
730     char *origfname;
731
732 #ifdef EASYWIN /*Easy Win */
733     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
734 #endif
735
736     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
737         cp = (unsigned char *)*argv;
738         options(cp);
739 #ifdef EXEC_IO
740         if (exec_f){
741             int fds[2], pid;
742             if (pipe(fds) < 0 || (pid = fork()) < 0){
743                 abort();
744             }
745             if (pid == 0){
746                 if (exec_f > 0){
747                     close(fds[0]);
748                     dup2(fds[1], 1);
749                 }else{
750                     close(fds[1]);
751                     dup2(fds[0], 0);
752                 }
753                 execvp(argv[1], &argv[1]);
754             }
755             if (exec_f > 0){
756                 close(fds[1]);
757                 dup2(fds[0], 0);
758             }else{
759                 close(fds[0]);
760                 dup2(fds[1], 1);
761             }
762             argc = 0;
763             break;
764         }
765 #endif
766     }
767     if(x0201_f == WISH_TRUE)
768          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
769
770     if (binmode_f == TRUE)
771 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
772     if (freopen("","wb",stdout) == NULL)
773         return (-1);
774 #else
775     setbinmode(stdout);
776 #endif
777
778     if (unbuf_f)
779       setbuf(stdout, (char *) NULL);
780     else
781       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
782
783     if (argc == 0) {
784       if (binmode_f == TRUE)
785 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
786       if (freopen("","rb",stdin) == NULL) return (-1);
787 #else
788       setbinmode(stdin);
789 #endif
790       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
791       if (nop_f)
792           noconvert(stdin);
793       else {
794           kanji_convert(stdin);
795           if (guess_f) print_guessed_code(NULL);
796       }
797     } else {
798       int nfiles = argc;
799         int is_argument_error = FALSE;
800       while (argc--) {
801             is_inputcode_mixed = FALSE;
802             input_codename = NULL;
803 #ifdef CHECK_OPTION
804             iconv_for_check = 0;
805 #endif
806           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
807               perror(*--argv);
808                 *argv++;
809                 is_argument_error = TRUE;
810                 continue;
811           } else {
812 #ifdef OVERWRITE
813               int fd = 0;
814               int fd_backup = 0;
815 #endif
816
817 /* reopen file for stdout */
818               if (file_out_f == TRUE) {
819 #ifdef OVERWRITE
820                   if (overwrite_f){
821                       outfname = malloc(strlen(origfname)
822                                         + strlen(".nkftmpXXXXXX")
823                                         + 1);
824                       if (!outfname){
825                           perror(origfname);
826                           return -1;
827                       }
828                       strcpy(outfname, origfname);
829 #ifdef MSDOS
830                       {
831                           int i;
832                           for (i = strlen(outfname); i; --i){
833                               if (outfname[i - 1] == '/'
834                                   || outfname[i - 1] == '\\'){
835                                   break;
836                               }
837                           }
838                           outfname[i] = '\0';
839                       }
840                       strcat(outfname, "ntXXXXXX");
841                       mktemp(outfname);
842                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
843                                 S_IREAD | S_IWRITE);
844 #else
845                       strcat(outfname, ".nkftmpXXXXXX");
846                       fd = mkstemp(outfname);
847 #endif
848                       if (fd < 0
849                           || (fd_backup = dup(fileno(stdout))) < 0
850                           || dup2(fd, fileno(stdout)) < 0
851                           ){
852                           perror(origfname);
853                           return -1;
854                       }
855                   }else
856 #endif
857                   if(argc == 1) {
858                       outfname = *argv++;
859                       argc--;
860                   } else {
861                       outfname = "nkf.out";
862                   }
863
864                   if(freopen(outfname, "w", stdout) == NULL) {
865                       perror (outfname);
866                       return (-1);
867                   }
868                   if (binmode_f == TRUE) {
869 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
870                       if (freopen("","wb",stdout) == NULL)
871                            return (-1);
872 #else
873                       setbinmode(stdout);
874 #endif
875                   }
876               }
877               if (binmode_f == TRUE)
878 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
879                  if (freopen("","rb",fin) == NULL)
880                     return (-1);
881 #else
882                  setbinmode(fin);
883 #endif
884               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
885               if (nop_f)
886                   noconvert(fin);
887               else {
888                   char *filename = NULL;
889                   kanji_convert(fin);
890                   if (nfiles > 1) filename = origfname;
891                   if (guess_f) print_guessed_code(filename);
892               }
893               fclose(fin);
894 #ifdef OVERWRITE
895               if (overwrite_f) {
896                   struct stat     sb;
897 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
898                   time_t tb[2];
899 #else
900                   struct utimbuf  tb;
901 #endif
902
903                   fflush(stdout);
904                   close(fd);
905                   if (dup2(fd_backup, fileno(stdout)) < 0){
906                       perror("dup2");
907                   }
908                   if (stat(origfname, &sb)) {
909                       fprintf(stderr, "Can't stat %s\n", origfname);
910                   }
911                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
912                   if (chmod(outfname, sb.st_mode)) {
913                       fprintf(stderr, "Can't set permission %s\n", outfname);
914                   }
915
916                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
917                     if(preserve_time_f){
918 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
919                         tb[0] = tb[1] = sb.st_mtime;
920                         if (utime(outfname, tb)) {
921                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
922                         }
923 #else
924                         tb.actime  = sb.st_atime;
925                         tb.modtime = sb.st_mtime;
926                         if (utime(outfname, &tb)) {
927                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
928                         }
929 #endif
930                     }
931                     if(backup_f){
932                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
933 #ifdef MSDOS
934                         unlink(backup_filename);
935 #endif
936                         if (rename(origfname, backup_filename)) {
937                             perror(backup_filename);
938                             fprintf(stderr, "Can't rename %s to %s\n",
939                                     origfname, backup_filename);
940                         }
941                     }else{
942 #ifdef MSDOS
943                         if (unlink(origfname)){
944                             perror(origfname);
945                         }
946 #endif
947                     }
948                   if (rename(outfname, origfname)) {
949                       perror(origfname);
950                       fprintf(stderr, "Can't rename %s to %s\n",
951                               outfname, origfname);
952                   }
953                   free(outfname);
954               }
955 #endif
956           }
957       }
958         if (is_argument_error)
959             return(-1);
960     }
961 #ifdef EASYWIN /*Easy Win */
962     if (file_out_f == FALSE)
963         scanf("%d",&end_check);
964     else
965         fclose(stdout);
966 #else /* for Other OS */
967     if (file_out_f == TRUE)
968         fclose(stdout);
969 #endif /*Easy Win */
970     return (0);
971 }
972 #endif /* WIN32DLL */
973
974 #ifdef OVERWRITE
975 char *get_backup_filename(const char *suffix, const char *filename)
976 {
977     char *backup_filename;
978     int asterisk_count = 0;
979     int i, j;
980     int filename_length = strlen(filename);
981
982     for(i = 0; suffix[i]; i++){
983         if(suffix[i] == '*') asterisk_count++;
984     }
985
986     if(asterisk_count){
987         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
988         if (!backup_filename){
989             perror("Can't malloc backup filename.");
990             return NULL;
991         }
992
993         for(i = 0, j = 0; suffix[i];){
994             if(suffix[i] == '*'){
995                 backup_filename[j] = '\0';
996                 strncat(backup_filename, filename, filename_length);
997                 i++;
998                 j += filename_length;
999             }else{
1000                 backup_filename[j++] = suffix[i++];
1001             }
1002         }
1003         backup_filename[j] = '\0';
1004     }else{
1005         j = strlen(suffix) + filename_length;
1006         backup_filename = malloc( + 1);
1007         strcpy(backup_filename, filename);
1008         strcat(backup_filename, suffix);
1009         backup_filename[j] = '\0';
1010     }
1011     return backup_filename;
1012 }
1013 #endif
1014
1015 static const struct {
1016     const char *name;
1017     const char *alias;
1018 } long_option[] = {
1019     {"ic=", ""},
1020     {"oc=", ""},
1021     {"base64","jMB"},
1022     {"euc","e"},
1023     {"euc-input","E"},
1024     {"fj","jm"},
1025     {"help","v"},
1026     {"jis","j"},
1027     {"jis-input","J"},
1028     {"mac","sLm"},
1029     {"mime","jM"},
1030     {"mime-input","m"},
1031     {"msdos","sLw"},
1032     {"sjis","s"},
1033     {"sjis-input","S"},
1034     {"unix","eLu"},
1035     {"version","V"},
1036     {"windows","sLw"},
1037     {"hiragana","h1"},
1038     {"katakana","h2"},
1039     {"katakana-hiragana","h3"},
1040     {"guess", "g"},
1041     {"cp932", ""},
1042     {"no-cp932", ""},
1043 #ifdef X0212_ENABLE
1044     {"x0212", ""},
1045 #endif
1046 #ifdef UTF8_OUTPUT_ENABLE
1047     {"utf8", "w"},
1048     {"utf16", "w16"},
1049     {"ms-ucs-map", ""},
1050     {"fb-skip", ""},
1051     {"fb-html", ""},
1052     {"fb-xml", ""},
1053     {"fb-perl", ""},
1054     {"fb-java", ""},
1055     {"fb-subchar", ""},
1056     {"fb-subchar=", ""},
1057 #endif
1058 #ifdef UTF8_INPUT_ENABLE
1059     {"utf8-input", "W"},
1060     {"utf16-input", "W16"},
1061     {"no-cp932ext", ""},
1062     {"no-best-fit-chars",""},
1063 #endif
1064 #ifdef UNICODE_NORMALIZATION
1065     {"utf8mac-input", ""},
1066 #endif
1067 #ifdef OVERWRITE
1068     {"overwrite", ""},
1069     {"overwrite=", ""},
1070     {"in-place", ""},
1071     {"in-place=", ""},
1072 #endif
1073 #ifdef INPUT_OPTION
1074     {"cap-input", ""},
1075     {"url-input", ""},
1076 #endif
1077 #ifdef NUMCHAR_OPTION
1078     {"numchar-input", ""},
1079 #endif
1080 #ifdef CHECK_OPTION
1081     {"no-output", ""},
1082     {"debug", ""},
1083 #endif
1084 #ifdef SHIFTJIS_CP932
1085     {"cp932inv", ""},
1086 #endif
1087 #ifdef EXEC_IO
1088     {"exec-in", ""},
1089     {"exec-out", ""},
1090 #endif
1091     {"prefix=", ""},
1092 };
1093
1094 static int option_mode = 0;
1095
1096 void options(unsigned char *cp)
1097 {
1098     nkf_char i, j;
1099     unsigned char *p;
1100     unsigned char *cp_back = NULL;
1101     char codeset[32];
1102
1103     if (option_mode==1)
1104         return;
1105     while(*cp && *cp++!='-');
1106     while (*cp || cp_back) {
1107         if(!*cp){
1108             cp = cp_back;
1109             cp_back = NULL;
1110             continue;
1111         }
1112         p = 0;
1113         switch (*cp++) {
1114         case '-':  /* literal options */
1115             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1116                 option_mode = 1;
1117                 return;
1118             }
1119             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1120                 p = (unsigned char *)long_option[i].name;
1121                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1122                 if (*p == cp[j] || cp[j] == SP){
1123                     p = &cp[j] + 1;
1124                     break;
1125                 }
1126                 p = 0;
1127             }
1128             if (p == 0) return;
1129             while(*cp && *cp != SP && cp++);
1130             if (long_option[i].alias[0]){
1131                 cp_back = cp;
1132                 cp = (unsigned char *)long_option[i].alias;
1133             }else{
1134                 if (strcmp(long_option[i].name, "ic=") == 0){
1135                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1136                         codeset[i] = nkf_toupper(p[i]);
1137                     }
1138                     codeset[i] = 0;
1139                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1140                         input_f = JIS_INPUT;
1141                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1142                       strcmp(codeset, "CP50220") == 0 ||
1143                       strcmp(codeset, "CP50221") == 0 ||
1144                       strcmp(codeset, "CP50222") == 0){
1145                         input_f = JIS_INPUT;
1146 #ifdef SHIFTJIS_CP932
1147                         cp51932_f = TRUE;
1148 #endif
1149 #ifdef UTF8_OUTPUT_ENABLE
1150                         ms_ucs_map_f = UCS_MAP_CP932;
1151 #endif
1152                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1153                         input_f = JIS_INPUT;
1154 #ifdef X0212_ENABLE
1155                         x0212_f = TRUE;
1156 #endif
1157                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1158                         input_f = JIS_INPUT;
1159 #ifdef X0212_ENABLE
1160                         x0212_f = TRUE;
1161 #endif
1162                         x0213_f = TRUE;
1163                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1164                         input_f = SJIS_INPUT;
1165                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1166                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1167                              strcmp(codeset, "CP932") == 0 ||
1168                              strcmp(codeset, "MS932") == 0){
1169                         input_f = SJIS_INPUT;
1170 #ifdef SHIFTJIS_CP932
1171                         cp51932_f = TRUE;
1172 #endif
1173 #ifdef UTF8_OUTPUT_ENABLE
1174                         ms_ucs_map_f = UCS_MAP_CP932;
1175 #endif
1176                     }else if(strcmp(codeset, "CP10001") == 0){
1177                         input_f = SJIS_INPUT;
1178 #ifdef SHIFTJIS_CP932
1179                         cp51932_f = TRUE;
1180 #endif
1181 #ifdef UTF8_OUTPUT_ENABLE
1182                         ms_ucs_map_f = UCS_MAP_CP10001;
1183 #endif
1184                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1185                              strcmp(codeset, "EUC-JP") == 0){
1186                         input_f = EUC_INPUT;
1187                     }else if(strcmp(codeset, "CP51932") == 0){
1188                         input_f = EUC_INPUT;
1189 #ifdef SHIFTJIS_CP932
1190                         cp51932_f = TRUE;
1191 #endif
1192 #ifdef UTF8_OUTPUT_ENABLE
1193                         ms_ucs_map_f = UCS_MAP_CP932;
1194 #endif
1195                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1196                              strcmp(codeset, "EUCJP-MS") == 0 ||
1197                              strcmp(codeset, "EUCJPMS") == 0){
1198                         input_f = EUC_INPUT;
1199 #ifdef SHIFTJIS_CP932
1200                         cp51932_f = FALSE;
1201 #endif
1202 #ifdef UTF8_OUTPUT_ENABLE
1203                         ms_ucs_map_f = UCS_MAP_MS;
1204 #endif
1205                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1206                              strcmp(codeset, "EUCJP-ASCII") == 0){
1207                         input_f = EUC_INPUT;
1208 #ifdef SHIFTJIS_CP932
1209                         cp51932_f = FALSE;
1210 #endif
1211 #ifdef UTF8_OUTPUT_ENABLE
1212                         ms_ucs_map_f = UCS_MAP_ASCII;
1213 #endif
1214                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1215                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1216                         input_f = SJIS_INPUT;
1217                         x0213_f = TRUE;
1218 #ifdef SHIFTJIS_CP932
1219                         cp51932_f = FALSE;
1220 #endif
1221                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1222                              strcmp(codeset, "EUC-JIS-2004") == 0){
1223                         input_f = EUC_INPUT;
1224                         x0213_f = TRUE;
1225 #ifdef SHIFTJIS_CP932
1226                         cp51932_f = FALSE;
1227 #endif
1228 #ifdef UTF8_INPUT_ENABLE
1229                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1230                              strcmp(codeset, "UTF-8N") == 0 ||
1231                              strcmp(codeset, "UTF-8-BOM") == 0){
1232                         input_f = UTF8_INPUT;
1233 #ifdef UNICODE_NORMALIZATION
1234                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1235                              strcmp(codeset, "UTF-8-MAC") == 0){
1236                         input_f = UTF8_INPUT;
1237                         nfc_f = TRUE;
1238 #endif
1239                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1240                              strcmp(codeset, "UTF-16BE") == 0 ||
1241                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1242                         input_f = UTF16_INPUT;
1243                         input_endian = ENDIAN_BIG;
1244                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1245                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1246                         input_f = UTF16_INPUT;
1247                         input_endian = ENDIAN_LITTLE;
1248                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1249                              strcmp(codeset, "UTF-32BE") == 0 ||
1250                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1251                         input_f = UTF32_INPUT;
1252                         input_endian = ENDIAN_BIG;
1253                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1254                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1255                         input_f = UTF32_INPUT;
1256                         input_endian = ENDIAN_LITTLE;
1257 #endif
1258                     }
1259                     continue;
1260                 }
1261                 if (strcmp(long_option[i].name, "oc=") == 0){
1262                     x0201_f = FALSE;
1263                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1264                         codeset[i] = nkf_toupper(p[i]);
1265                     }
1266                     codeset[i] = 0;
1267                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1268                         output_conv = j_oconv;
1269                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1270                         output_conv = j_oconv;
1271                         no_cp932ext_f = TRUE;
1272 #ifdef SHIFTJIS_CP932
1273                         cp932inv_f = FALSE;
1274 #endif
1275 #ifdef UTF8_OUTPUT_ENABLE
1276                         ms_ucs_map_f = UCS_MAP_CP932;
1277 #endif
1278                     }else if(strcmp(codeset, "CP50220") == 0){
1279                         output_conv = j_oconv;
1280                         x0201_f = TRUE;
1281 #ifdef SHIFTJIS_CP932
1282                         cp932inv_f = FALSE;
1283 #endif
1284 #ifdef UTF8_OUTPUT_ENABLE
1285                         ms_ucs_map_f = UCS_MAP_CP932;
1286 #endif
1287                     }else if(strcmp(codeset, "CP50221") == 0){
1288                         output_conv = j_oconv;
1289 #ifdef SHIFTJIS_CP932
1290                         cp932inv_f = FALSE;
1291 #endif
1292 #ifdef UTF8_OUTPUT_ENABLE
1293                         ms_ucs_map_f = UCS_MAP_CP932;
1294 #endif
1295                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1296                         output_conv = j_oconv;
1297 #ifdef X0212_ENABLE
1298                         x0212_f = TRUE;
1299 #endif
1300 #ifdef SHIFTJIS_CP932
1301                         cp932inv_f = FALSE;
1302 #endif
1303                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1304                         output_conv = j_oconv;
1305 #ifdef X0212_ENABLE
1306                         x0212_f = TRUE;
1307 #endif
1308                         x0213_f = TRUE;
1309 #ifdef SHIFTJIS_CP932
1310                         cp932inv_f = FALSE;
1311 #endif
1312                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1313                         output_conv = s_oconv;
1314                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1315                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1316                              strcmp(codeset, "CP932") == 0 ||
1317                              strcmp(codeset, "MS932") == 0){
1318                         output_conv = s_oconv;
1319 #ifdef UTF8_OUTPUT_ENABLE
1320                         ms_ucs_map_f = UCS_MAP_CP932;
1321 #endif
1322                     }else if(strcmp(codeset, "CP10001") == 0){
1323                         output_conv = s_oconv;
1324 #ifdef UTF8_OUTPUT_ENABLE
1325                         ms_ucs_map_f = UCS_MAP_CP10001;
1326 #endif
1327                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1328                              strcmp(codeset, "EUC-JP") == 0){
1329                         output_conv = e_oconv;
1330                     }else if(strcmp(codeset, "CP51932") == 0){
1331                         output_conv = e_oconv;
1332 #ifdef SHIFTJIS_CP932
1333                         cp932inv_f = FALSE;
1334 #endif
1335 #ifdef UTF8_OUTPUT_ENABLE
1336                         ms_ucs_map_f = UCS_MAP_CP932;
1337 #endif
1338                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1339                              strcmp(codeset, "EUCJP-MS") == 0 ||
1340                              strcmp(codeset, "EUCJPMS") == 0){
1341                         output_conv = e_oconv;
1342 #ifdef X0212_ENABLE
1343                         x0212_f = TRUE;
1344 #endif
1345 #ifdef UTF8_OUTPUT_ENABLE
1346                         ms_ucs_map_f = UCS_MAP_MS;
1347 #endif
1348                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1349                              strcmp(codeset, "EUCJP-ASCII") == 0){
1350                         output_conv = e_oconv;
1351 #ifdef X0212_ENABLE
1352                         x0212_f = TRUE;
1353 #endif
1354 #ifdef UTF8_OUTPUT_ENABLE
1355                         ms_ucs_map_f = UCS_MAP_ASCII;
1356 #endif
1357                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1358                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1359                         output_conv = s_oconv;
1360                         x0213_f = TRUE;
1361 #ifdef SHIFTJIS_CP932
1362                         cp932inv_f = FALSE;
1363 #endif
1364                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1365                              strcmp(codeset, "EUC-JIS-2004") == 0){
1366                         output_conv = e_oconv;
1367 #ifdef X0212_ENABLE
1368                         x0212_f = TRUE;
1369 #endif
1370                         x0213_f = TRUE;
1371 #ifdef SHIFTJIS_CP932
1372                         cp932inv_f = FALSE;
1373 #endif
1374 #ifdef UTF8_OUTPUT_ENABLE
1375                     }else if(strcmp(codeset, "UTF-8") == 0){
1376                         output_conv = w_oconv;
1377                     }else if(strcmp(codeset, "UTF-8N") == 0){
1378                         output_conv = w_oconv;
1379                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1380                         output_conv = w_oconv;
1381                         output_bom_f = TRUE;
1382                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1383                         output_conv = w_oconv16;
1384                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1385                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1386                         output_conv = w_oconv16;
1387                         output_bom_f = TRUE;
1388                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1389                         output_conv = w_oconv16;
1390                         output_endian = ENDIAN_LITTLE;
1391                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1392                         output_conv = w_oconv16;
1393                         output_endian = ENDIAN_LITTLE;
1394                         output_bom_f = TRUE;
1395                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1396                              strcmp(codeset, "UTF-32BE") == 0){
1397                         output_conv = w_oconv32;
1398                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1399                         output_conv = w_oconv32;
1400                         output_bom_f = TRUE;
1401                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1402                         output_conv = w_oconv32;
1403                         output_endian = ENDIAN_LITTLE;
1404                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1405                         output_conv = w_oconv32;
1406                         output_endian = ENDIAN_LITTLE;
1407                         output_bom_f = TRUE;
1408 #endif
1409                     }
1410                     continue;
1411                 }
1412 #ifdef OVERWRITE
1413                 if (strcmp(long_option[i].name, "overwrite") == 0){
1414                     file_out_f = TRUE;
1415                     overwrite_f = TRUE;
1416                     preserve_time_f = TRUE;
1417                     continue;
1418                 }
1419                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1420                     file_out_f = TRUE;
1421                     overwrite_f = TRUE;
1422                     preserve_time_f = TRUE;
1423                     backup_f = TRUE;
1424                     backup_suffix = malloc(strlen((char *) p) + 1);
1425                     strcpy(backup_suffix, (char *) p);
1426                     continue;
1427                 }
1428                 if (strcmp(long_option[i].name, "in-place") == 0){
1429                     file_out_f = TRUE;
1430                     overwrite_f = TRUE;
1431                     preserve_time_f = FALSE;
1432                     continue;
1433                 }
1434                 if (strcmp(long_option[i].name, "in-place=") == 0){
1435                     file_out_f = TRUE;
1436                     overwrite_f = TRUE;
1437                     preserve_time_f = FALSE;
1438                     backup_f = TRUE;
1439                     backup_suffix = malloc(strlen((char *) p) + 1);
1440                     strcpy(backup_suffix, (char *) p);
1441                     continue;
1442                 }
1443 #endif
1444 #ifdef INPUT_OPTION
1445                 if (strcmp(long_option[i].name, "cap-input") == 0){
1446                     cap_f = TRUE;
1447                     continue;
1448                 }
1449                 if (strcmp(long_option[i].name, "url-input") == 0){
1450                     url_f = TRUE;
1451                     continue;
1452                 }
1453 #endif
1454 #ifdef NUMCHAR_OPTION
1455                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1456                     numchar_f = TRUE;
1457                     continue;
1458                 }
1459 #endif
1460 #ifdef CHECK_OPTION
1461                 if (strcmp(long_option[i].name, "no-output") == 0){
1462                     noout_f = TRUE;
1463                     continue;
1464                 }
1465                 if (strcmp(long_option[i].name, "debug") == 0){
1466                     debug_f = TRUE;
1467                     continue;
1468                 }
1469 #endif
1470                 if (strcmp(long_option[i].name, "cp932") == 0){
1471 #ifdef SHIFTJIS_CP932
1472                     cp51932_f = TRUE;
1473                     cp932inv_f = TRUE;
1474 #endif
1475 #ifdef UTF8_OUTPUT_ENABLE
1476                     ms_ucs_map_f = UCS_MAP_CP932;
1477 #endif
1478                     continue;
1479                 }
1480                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1481 #ifdef SHIFTJIS_CP932
1482                     cp51932_f = FALSE;
1483                     cp932inv_f = FALSE;
1484 #endif
1485 #ifdef UTF8_OUTPUT_ENABLE
1486                     ms_ucs_map_f = UCS_MAP_ASCII;
1487 #endif
1488                     continue;
1489                 }
1490 #ifdef SHIFTJIS_CP932
1491                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1492                     cp932inv_f = TRUE;
1493                     continue;
1494                 }
1495 #endif
1496
1497 #ifdef X0212_ENABLE
1498                 if (strcmp(long_option[i].name, "x0212") == 0){
1499                     x0212_f = TRUE;
1500                     continue;
1501                 }
1502 #endif
1503
1504 #ifdef EXEC_IO
1505                   if (strcmp(long_option[i].name, "exec-in") == 0){
1506                       exec_f = 1;
1507                       return;
1508                   }
1509                   if (strcmp(long_option[i].name, "exec-out") == 0){
1510                       exec_f = -1;
1511                       return;
1512                   }
1513 #endif
1514 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1515                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1516                     no_cp932ext_f = TRUE;
1517                     continue;
1518                 }
1519                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1520                     no_best_fit_chars_f = TRUE;
1521                     continue;
1522                 }
1523                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1524                     encode_fallback = NULL;
1525                     continue;
1526                 }
1527                 if (strcmp(long_option[i].name, "fb-html") == 0){
1528                     encode_fallback = encode_fallback_html;
1529                     continue;
1530                 }
1531                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1532                     encode_fallback = encode_fallback_xml;
1533                     continue;
1534                 }
1535                 if (strcmp(long_option[i].name, "fb-java") == 0){
1536                     encode_fallback = encode_fallback_java;
1537                     continue;
1538                 }
1539                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1540                     encode_fallback = encode_fallback_perl;
1541                     continue;
1542                 }
1543                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1544                     encode_fallback = encode_fallback_subchar;
1545                     continue;
1546                 }
1547                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1548                     encode_fallback = encode_fallback_subchar;
1549                     unicode_subchar = 0;
1550                     if (p[0] != '0'){
1551                         /* decimal number */
1552                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1553                             unicode_subchar *= 10;
1554                             unicode_subchar += hex2bin(p[i]);
1555                         }
1556                     }else if(p[1] == 'x' || p[1] == 'X'){
1557                         /* hexadecimal number */
1558                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1559                             unicode_subchar <<= 4;
1560                             unicode_subchar |= hex2bin(p[i]);
1561                         }
1562                     }else{
1563                         /* octal number */
1564                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1565                             unicode_subchar *= 8;
1566                             unicode_subchar += hex2bin(p[i]);
1567                         }
1568                     }
1569                     w16e_conv(unicode_subchar, &i, &j);
1570                     unicode_subchar = i<<8 | j;
1571                     continue;
1572                 }
1573 #endif
1574 #ifdef UTF8_OUTPUT_ENABLE
1575                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1576                     ms_ucs_map_f = UCS_MAP_MS;
1577                     continue;
1578                 }
1579 #endif
1580 #ifdef UNICODE_NORMALIZATION
1581                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1582                     input_f = UTF8_INPUT;
1583                     nfc_f = TRUE;
1584                     continue;
1585                 }
1586 #endif
1587                 if (strcmp(long_option[i].name, "prefix=") == 0){
1588                     if (nkf_isgraph(p[0])){
1589                         for (i = 1; nkf_isgraph(p[i]); i++){
1590                             prefix_table[p[i]] = p[0];
1591                         }
1592                     }
1593                     continue;
1594                 }
1595             }
1596             continue;
1597         case 'b':           /* buffered mode */
1598             unbuf_f = FALSE;
1599             continue;
1600         case 'u':           /* non bufferd mode */
1601             unbuf_f = TRUE;
1602             continue;
1603         case 't':           /* transparent mode */
1604             if (*cp=='1') {
1605                 /* alias of -t */
1606                 nop_f = TRUE;
1607                 *cp++;
1608             } else if (*cp=='2') {
1609                 /*
1610                  * -t with put/get
1611                  *
1612                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1613                  *
1614                  */
1615                 nop_f = 2;
1616                 *cp++;
1617             } else
1618                 nop_f = TRUE;
1619             continue;
1620         case 'j':           /* JIS output */
1621         case 'n':
1622             output_conv = j_oconv;
1623             continue;
1624         case 'e':           /* AT&T EUC output */
1625             output_conv = e_oconv;
1626             cp932inv_f = FALSE;
1627             continue;
1628         case 's':           /* SJIS output */
1629             output_conv = s_oconv;
1630             continue;
1631         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1632             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1633             input_f = LATIN1_INPUT;
1634             continue;
1635         case 'i':           /* Kanji IN ESC-$-@/B */
1636             if (*cp=='@'||*cp=='B')
1637                 kanji_intro = *cp++;
1638             continue;
1639         case 'o':           /* ASCII IN ESC-(-J/B */
1640             if (*cp=='J'||*cp=='B'||*cp=='H')
1641                 ascii_intro = *cp++;
1642             continue;
1643         case 'h':
1644             /*
1645                 bit:1   katakana->hiragana
1646                 bit:2   hiragana->katakana
1647             */
1648             if ('9'>= *cp && *cp>='0')
1649                 hira_f |= (*cp++ -'0');
1650             else
1651                 hira_f |= 1;
1652             continue;
1653         case 'r':
1654             rot_f = TRUE;
1655             continue;
1656 #if defined(MSDOS) || defined(__OS2__)
1657         case 'T':
1658             binmode_f = FALSE;
1659             continue;
1660 #endif
1661 #ifndef PERL_XS
1662         case 'V':
1663             version();
1664             exit(1);
1665             break;
1666         case 'v':
1667             usage();
1668             exit(1);
1669             break;
1670 #endif
1671 #ifdef UTF8_OUTPUT_ENABLE
1672         case 'w':           /* UTF-8 output */
1673             if (cp[0] == '8') {
1674                 output_conv = w_oconv; cp++;
1675                 if (cp[0] == '0'){
1676                     cp++;
1677                 } else {
1678                     output_bom_f = TRUE;
1679                 }
1680             } else {
1681                 if ('1'== cp[0] && '6'==cp[1]) {
1682                     output_conv = w_oconv16; cp+=2;
1683                 } else if ('3'== cp[0] && '2'==cp[1]) {
1684                     output_conv = w_oconv32; cp+=2;
1685                 } else {
1686                     output_conv = w_oconv;
1687                     continue;
1688                 }
1689                 if (cp[0]=='L') {
1690                     cp++;
1691                     output_endian = ENDIAN_LITTLE;
1692                 } else if (cp[0] == 'B') {
1693                     cp++;
1694                 } else {
1695                     continue;
1696                 }
1697                 if (cp[0] == '0'){
1698                     cp++;
1699                 } else {
1700                     output_bom_f = TRUE;
1701                 }
1702             }
1703             continue;
1704 #endif
1705 #ifdef UTF8_INPUT_ENABLE
1706         case 'W':           /* UTF input */
1707             if (cp[0] == '8') {
1708                 cp++;
1709                 input_f = UTF8_INPUT;
1710             }else{
1711                 if ('1'== cp[0] && '6'==cp[1]) {
1712                     cp += 2;
1713                     input_f = UTF16_INPUT;
1714                     input_endian = ENDIAN_BIG;
1715                 } else if ('3'== cp[0] && '2'==cp[1]) {
1716                     cp += 2;
1717                     input_f = UTF32_INPUT;
1718                     input_endian = ENDIAN_BIG;
1719                 } else {
1720                     input_f = UTF8_INPUT;
1721                     continue;
1722                 }
1723                 if (cp[0]=='L') {
1724                     cp++;
1725                     input_endian = ENDIAN_LITTLE;
1726                 } else if (cp[0] == 'B') {
1727                     cp++;
1728                 }
1729             }
1730             continue;
1731 #endif
1732         /* Input code assumption */
1733         case 'J':   /* JIS input */
1734             input_f = JIS_INPUT;
1735             continue;
1736         case 'E':   /* AT&T EUC input */
1737             input_f = EUC_INPUT;
1738             continue;
1739         case 'S':   /* MS Kanji input */
1740             input_f = SJIS_INPUT;
1741             if (x0201_f==NO_X0201) x0201_f=TRUE;
1742             continue;
1743         case 'Z':   /* Convert X0208 alphabet to asii */
1744             /* alpha_f
1745                bit:0   Convert JIS X 0208 Alphabet to ASCII
1746                bit:1   Convert Kankaku to one space
1747                bit:2   Convert Kankaku to two spaces
1748                bit:3   Convert HTML Entity
1749                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1750             */
1751             while ('0'<= *cp && *cp <='9') {
1752                 alpha_f |= 1 << (*cp++ - '0');
1753             }
1754             if (!alpha_f) alpha_f = 1;
1755             continue;
1756         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1757             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1758             /* accept  X0201
1759                     ESC-(-I     in JIS, EUC, MS Kanji
1760                     SI/SO       in JIS, EUC, MS Kanji
1761                     SSO         in EUC, JIS, not in MS Kanji
1762                     MS Kanji (0xa0-0xdf)
1763                output  X0201
1764                     ESC-(-I     in JIS (0x20-0x5f)
1765                     SSO         in EUC (0xa0-0xdf)
1766                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1767             */
1768             continue;
1769         case 'X':   /* Assume X0201 kana */
1770             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1771             x0201_f = TRUE;
1772             continue;
1773         case 'F':   /* prserve new lines */
1774             fold_preserve_f = TRUE;
1775         case 'f':   /* folding -f60 or -f */
1776             fold_f = TRUE;
1777             fold_len = 0;
1778             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1779                 fold_len *= 10;
1780                 fold_len += *cp++ - '0';
1781             }
1782             if (!(0<fold_len && fold_len<BUFSIZ))
1783                 fold_len = DEFAULT_FOLD;
1784             if (*cp=='-') {
1785                 fold_margin = 0;
1786                 cp++;
1787                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1788                     fold_margin *= 10;
1789                     fold_margin += *cp++ - '0';
1790                 }
1791             }
1792             continue;
1793         case 'm':   /* MIME support */
1794             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1795             if (*cp=='B'||*cp=='Q') {
1796                 mime_decode_mode = *cp++;
1797                 mimebuf_f = FIXED_MIME;
1798             } else if (*cp=='N') {
1799                 mime_f = TRUE; cp++;
1800             } else if (*cp=='S') {
1801                 mime_f = STRICT_MIME; cp++;
1802             } else if (*cp=='0') {
1803                 mime_decode_f = FALSE;
1804                 mime_f = FALSE; cp++;
1805             }
1806             continue;
1807         case 'M':   /* MIME output */
1808             if (*cp=='B') {
1809                 mimeout_mode = 'B';
1810                 mimeout_f = FIXED_MIME; cp++;
1811             } else if (*cp=='Q') {
1812                 mimeout_mode = 'Q';
1813                 mimeout_f = FIXED_MIME; cp++;
1814             } else {
1815                 mimeout_f = TRUE;
1816             }
1817             continue;
1818         case 'B':   /* Broken JIS support */
1819             /*  bit:0   no ESC JIS
1820                 bit:1   allow any x on ESC-(-x or ESC-$-x
1821                 bit:2   reset to ascii on NL
1822             */
1823             if ('9'>= *cp && *cp>='0')
1824                 broken_f |= 1<<(*cp++ -'0');
1825             else
1826                 broken_f |= TRUE;
1827             continue;
1828 #ifndef PERL_XS
1829         case 'O':/* for Output file */
1830             file_out_f = TRUE;
1831             continue;
1832 #endif
1833         case 'c':/* add cr code */
1834             nlmode_f = CRLF;
1835             continue;
1836         case 'd':/* delete cr code */
1837             nlmode_f = LF;
1838             continue;
1839         case 'I':   /* ISO-2022-JP output */
1840             iso2022jp_f = TRUE;
1841             continue;
1842         case 'L':  /* line mode */
1843             if (*cp=='u') {         /* unix */
1844                 nlmode_f = LF; cp++;
1845             } else if (*cp=='m') { /* mac */
1846                 nlmode_f = CR; cp++;
1847             } else if (*cp=='w') { /* windows */
1848                 nlmode_f = CRLF; cp++;
1849             } else if (*cp=='0') { /* no conversion  */
1850                 nlmode_f = 0; cp++;
1851             }
1852             continue;
1853         case 'g':
1854 #ifndef PERL_XS
1855             guess_f = TRUE;
1856 #endif
1857             continue;
1858         case SP:
1859         /* module muliple options in a string are allowed for Perl moudle  */
1860             while(*cp && *cp++!='-');
1861             continue;
1862         default:
1863             /* bogus option but ignored */
1864             continue;
1865         }
1866     }
1867 }
1868
1869 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1870 {
1871     if (iconv_func){
1872         struct input_code *p = input_code_list;
1873         while (p->name){
1874             if (iconv_func == p->iconv_func){
1875                 return p;
1876             }
1877             p++;
1878         }
1879     }
1880     return 0;
1881 }
1882
1883 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1884 {
1885 #ifdef INPUT_CODE_FIX
1886     if (f || !input_f)
1887 #endif
1888         if (estab_f != f){
1889             estab_f = f;
1890         }
1891
1892     if (iconv_func
1893 #ifdef INPUT_CODE_FIX
1894         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1895 #endif
1896         ){
1897         iconv = iconv_func;
1898     }
1899 #ifdef CHECK_OPTION
1900     if (estab_f && iconv_for_check != iconv){
1901         struct input_code *p = find_inputcode_byfunc(iconv);
1902         if (p){
1903             set_input_codename(p->name);
1904             debug(p->name);
1905         }
1906         iconv_for_check = iconv;
1907     }
1908 #endif
1909 }
1910
1911 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1912 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1913 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1914 #ifdef SHIFTJIS_CP932
1915 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B */
1916 #define SCORE_NO_EXIST (SCORE_CP932 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1917 #else
1918 #define SCORE_NO_EXIST (SCORE_DEPEND << 1)   /* \e$BB8:_$7$J$$J8;z\e(B */
1919 #endif
1920 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1921 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1922
1923 #define SCORE_INIT (SCORE_iMIME)
1924
1925 static const char score_table_A0[] = {
1926     0, 0, 0, 0,
1927     0, 0, 0, 0,
1928     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1929     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1930 };
1931
1932 static const char score_table_F0[] = {
1933     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1934     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1935     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1936     SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1937 };
1938
1939 void set_code_score(struct input_code *ptr, nkf_char score)
1940 {
1941     if (ptr){
1942         ptr->score |= score;
1943     }
1944 }
1945
1946 void clr_code_score(struct input_code *ptr, nkf_char score)
1947 {
1948     if (ptr){
1949         ptr->score &= ~score;
1950     }
1951 }
1952
1953 void code_score(struct input_code *ptr)
1954 {
1955     nkf_char c2 = ptr->buf[0];
1956 #ifdef UTF8_OUTPUT_ENABLE
1957     nkf_char c1 = ptr->buf[1];
1958 #endif
1959     if (c2 < 0){
1960         set_code_score(ptr, SCORE_ERROR);
1961     }else if (c2 == SSO){
1962         set_code_score(ptr, SCORE_KANA);
1963 #ifdef UTF8_OUTPUT_ENABLE
1964     }else if (!e2w_conv(c2, c1)){
1965         set_code_score(ptr, SCORE_NO_EXIST);
1966 #endif
1967     }else if ((c2 & 0x70) == 0x20){
1968         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1969     }else if ((c2 & 0x70) == 0x70){
1970         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1971     }else if ((c2 & 0x70) >= 0x50){
1972         set_code_score(ptr, SCORE_L2);
1973     }
1974 }
1975
1976 void status_disable(struct input_code *ptr)
1977 {
1978     ptr->stat = -1;
1979     ptr->buf[0] = -1;
1980     code_score(ptr);
1981     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1982 }
1983
1984 void status_push_ch(struct input_code *ptr, nkf_char c)
1985 {
1986     ptr->buf[ptr->index++] = c;
1987 }
1988
1989 void status_clear(struct input_code *ptr)
1990 {
1991     ptr->stat = 0;
1992     ptr->index = 0;
1993 }
1994
1995 void status_reset(struct input_code *ptr)
1996 {
1997     status_clear(ptr);
1998     ptr->score = SCORE_INIT;
1999 }
2000
2001 void status_reinit(struct input_code *ptr)
2002 {
2003     status_reset(ptr);
2004     ptr->_file_stat = 0;
2005 }
2006
2007 void status_check(struct input_code *ptr, nkf_char c)
2008 {
2009     if (c <= DEL && estab_f){
2010         status_reset(ptr);
2011     }
2012 }
2013
2014 void s_status(struct input_code *ptr, nkf_char c)
2015 {
2016     switch(ptr->stat){
2017       case -1:
2018           status_check(ptr, c);
2019           break;
2020       case 0:
2021           if (c <= DEL){
2022               break;
2023 #ifdef NUMCHAR_OPTION
2024           }else if (is_unicode_capsule(c)){
2025               break;
2026 #endif
2027           }else if (0xa1 <= c && c <= 0xdf){
2028               status_push_ch(ptr, SSO);
2029               status_push_ch(ptr, c);
2030               code_score(ptr);
2031               status_clear(ptr);
2032           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2033               ptr->stat = 1;
2034               status_push_ch(ptr, c);
2035 #ifdef SHIFTJIS_CP932
2036           }else if (cp51932_f
2037                     && is_ibmext_in_sjis(c)){
2038               ptr->stat = 2;
2039               status_push_ch(ptr, c);
2040 #endif /* SHIFTJIS_CP932 */
2041 #ifdef X0212_ENABLE
2042           }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2043               ptr->stat = 1;
2044               status_push_ch(ptr, c);
2045 #endif /* X0212_ENABLE */
2046           }else{
2047               status_disable(ptr);
2048           }
2049           break;
2050       case 1:
2051           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2052               status_push_ch(ptr, c);
2053               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2054               code_score(ptr);
2055               status_clear(ptr);
2056           }else{
2057               status_disable(ptr);
2058           }
2059           break;
2060       case 2:
2061 #ifdef SHIFTJIS_CP932
2062           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2063               status_push_ch(ptr, c);
2064               if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2065                   set_code_score(ptr, SCORE_CP932);
2066                   status_clear(ptr);
2067                   break;
2068               }
2069           }
2070 #endif /* SHIFTJIS_CP932 */
2071 #ifndef X0212_ENABLE
2072           status_disable(ptr);
2073 #endif
2074           break;
2075     }
2076 }
2077
2078 void e_status(struct input_code *ptr, nkf_char c)
2079 {
2080     switch (ptr->stat){
2081       case -1:
2082           status_check(ptr, c);
2083           break;
2084       case 0:
2085           if (c <= DEL){
2086               break;
2087 #ifdef NUMCHAR_OPTION
2088           }else if (is_unicode_capsule(c)){
2089               break;
2090 #endif
2091           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2092               ptr->stat = 1;
2093               status_push_ch(ptr, c);
2094 #ifdef X0212_ENABLE
2095           }else if (0x8f == c){
2096               ptr->stat = 2;
2097               status_push_ch(ptr, c);
2098 #endif /* X0212_ENABLE */
2099           }else{
2100               status_disable(ptr);
2101           }
2102           break;
2103       case 1:
2104           if (0xa1 <= c && c <= 0xfe){
2105               status_push_ch(ptr, c);
2106               code_score(ptr);
2107               status_clear(ptr);
2108           }else{
2109               status_disable(ptr);
2110           }
2111           break;
2112 #ifdef X0212_ENABLE
2113       case 2:
2114           if (0xa1 <= c && c <= 0xfe){
2115               ptr->stat = 1;
2116               status_push_ch(ptr, c);
2117           }else{
2118               status_disable(ptr);
2119           }
2120 #endif /* X0212_ENABLE */
2121     }
2122 }
2123
2124 #ifdef UTF8_INPUT_ENABLE
2125 void w_status(struct input_code *ptr, nkf_char c)
2126 {
2127     switch (ptr->stat){
2128       case -1:
2129           status_check(ptr, c);
2130           break;
2131       case 0:
2132           if (c <= DEL){
2133               break;
2134 #ifdef NUMCHAR_OPTION
2135           }else if (is_unicode_capsule(c)){
2136               break;
2137 #endif
2138           }else if (0xc0 <= c && c <= 0xdf){
2139               ptr->stat = 1;
2140               status_push_ch(ptr, c);
2141           }else if (0xe0 <= c && c <= 0xef){
2142               ptr->stat = 2;
2143               status_push_ch(ptr, c);
2144           }else if (0xf0 <= c && c <= 0xf4){
2145               ptr->stat = 3;
2146               status_push_ch(ptr, c);
2147           }else{
2148               status_disable(ptr);
2149           }
2150           break;
2151       case 1:
2152       case 2:
2153           if (0x80 <= c && c <= 0xbf){
2154               status_push_ch(ptr, c);
2155               if (ptr->index > ptr->stat){
2156                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2157                              && ptr->buf[2] == 0xbf);
2158                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2159                            &ptr->buf[0], &ptr->buf[1]);
2160                   if (!bom){
2161                       code_score(ptr);
2162                   }
2163                   status_clear(ptr);
2164               }
2165           }else{
2166               status_disable(ptr);
2167           }
2168           break;
2169       case 3:
2170         if (0x80 <= c && c <= 0xbf){
2171             if (ptr->index < ptr->stat){
2172                 status_push_ch(ptr, c);
2173             } else {
2174                 status_clear(ptr);
2175             }
2176           }else{
2177               status_disable(ptr);
2178           }
2179           break;
2180     }
2181 }
2182 #endif
2183
2184 void code_status(nkf_char c)
2185 {
2186     int action_flag = 1;
2187     struct input_code *result = 0;
2188     struct input_code *p = input_code_list;
2189     while (p->name){
2190         if (!p->status_func) {
2191             ++p;
2192             continue;
2193         }
2194         if (!p->status_func)
2195             continue;
2196         (p->status_func)(p, c);
2197         if (p->stat > 0){
2198             action_flag = 0;
2199         }else if(p->stat == 0){
2200             if (result){
2201                 action_flag = 0;
2202             }else{
2203                 result = p;
2204             }
2205         }
2206         ++p;
2207     }
2208
2209     if (action_flag){
2210         if (result && !estab_f){
2211             set_iconv(TRUE, result->iconv_func);
2212         }else if (c <= DEL){
2213             struct input_code *ptr = input_code_list;
2214             while (ptr->name){
2215                 status_reset(ptr);
2216                 ++ptr;
2217             }
2218         }
2219     }
2220 }
2221
2222 #ifndef WIN32DLL
2223 nkf_char std_getc(FILE *f)
2224 {
2225     if (std_gc_ndx){
2226         return std_gc_buf[--std_gc_ndx];
2227     }
2228     return getc(f);
2229 }
2230 #endif /*WIN32DLL*/
2231
2232 nkf_char std_ungetc(nkf_char c, FILE *f)
2233 {
2234     if (std_gc_ndx == STD_GC_BUFSIZE){
2235         return EOF;
2236     }
2237     std_gc_buf[std_gc_ndx++] = c;
2238     return c;
2239 }
2240
2241 #ifndef WIN32DLL
2242 void std_putc(nkf_char c)
2243 {
2244     if(c!=EOF)
2245       putchar(c);
2246 }
2247 #endif /*WIN32DLL*/
2248
2249 #if !defined(PERL_XS) && !defined(WIN32DLL)
2250 nkf_char noconvert(FILE *f)
2251 {
2252     nkf_char    c;
2253
2254     if (nop_f == 2)
2255         module_connection();
2256     while ((c = (*i_getc)(f)) != EOF)
2257       (*o_putc)(c);
2258     (*o_putc)(EOF);
2259     return 1;
2260 }
2261 #endif
2262
2263 void module_connection(void)
2264 {
2265     oconv = output_conv;
2266     o_putc = std_putc;
2267
2268     /* replace continucation module, from output side */
2269
2270     /* output redicrection */
2271 #ifdef CHECK_OPTION
2272     if (noout_f || guess_f){
2273         o_putc = no_putc;
2274     }
2275 #endif
2276     if (mimeout_f) {
2277         o_mputc = o_putc;
2278         o_putc = mime_putc;
2279         if (mimeout_f == TRUE) {
2280             o_base64conv = oconv; oconv = base64_conv;
2281         }
2282         /* base64_count = 0; */
2283     }
2284
2285     if (nlmode_f || guess_f) {
2286         o_nlconv = oconv; oconv = nl_conv;
2287     }
2288     if (rot_f) {
2289         o_rot_conv = oconv; oconv = rot_conv;
2290     }
2291     if (iso2022jp_f) {
2292         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2293     }
2294     if (hira_f) {
2295         o_hira_conv = oconv; oconv = hira_conv;
2296     }
2297     if (fold_f) {
2298         o_fconv = oconv; oconv = fold_conv;
2299         f_line = 0;
2300     }
2301     if (alpha_f || x0201_f) {
2302         o_zconv = oconv; oconv = z_conv;
2303     }
2304
2305     i_getc = std_getc;
2306     i_ungetc = std_ungetc;
2307     /* input redicrection */
2308 #ifdef INPUT_OPTION
2309     if (cap_f){
2310         i_cgetc = i_getc; i_getc = cap_getc;
2311         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2312     }
2313     if (url_f){
2314         i_ugetc = i_getc; i_getc = url_getc;
2315         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2316     }
2317 #endif
2318 #ifdef NUMCHAR_OPTION
2319     if (numchar_f){
2320         i_ngetc = i_getc; i_getc = numchar_getc;
2321         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2322     }
2323 #endif
2324 #ifdef UNICODE_NORMALIZATION
2325     if (nfc_f && input_f == UTF8_INPUT){
2326         i_nfc_getc = i_getc; i_getc = nfc_getc;
2327         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2328     }
2329 #endif
2330     if (mime_f && mimebuf_f==FIXED_MIME) {
2331         i_mgetc = i_getc; i_getc = mime_getc;
2332         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2333     }
2334     if (broken_f & 1) {
2335         i_bgetc = i_getc; i_getc = broken_getc;
2336         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2337     }
2338     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2339         set_iconv(-TRUE, e_iconv);
2340     } else if (input_f == SJIS_INPUT) {
2341         set_iconv(-TRUE, s_iconv);
2342 #ifdef UTF8_INPUT_ENABLE
2343     } else if (input_f == UTF8_INPUT) {
2344         set_iconv(-TRUE, w_iconv);
2345     } else if (input_f == UTF16_INPUT) {
2346         set_iconv(-TRUE, w_iconv16);
2347     } else if (input_f == UTF32_INPUT) {
2348         set_iconv(-TRUE, w_iconv32);
2349 #endif
2350     } else {
2351         set_iconv(FALSE, e_iconv);
2352     }
2353
2354     {
2355         struct input_code *p = input_code_list;
2356         while (p->name){
2357             status_reinit(p++);
2358         }
2359     }
2360 }
2361
2362 /*
2363  * Check and Ignore BOM
2364  */
2365 void check_bom(FILE *f)
2366 {
2367     int c2;
2368     switch(c2 = (*i_getc)(f)){
2369     case 0x00:
2370         if((c2 = (*i_getc)(f)) == 0x00){
2371             if((c2 = (*i_getc)(f)) == 0xFE){
2372                 if((c2 = (*i_getc)(f)) == 0xFF){
2373                     if(!input_f){
2374                         set_iconv(TRUE, w_iconv32);
2375                     }
2376                     if (iconv == w_iconv32) {
2377                         input_endian = ENDIAN_BIG;
2378                         return;
2379                     }
2380                     (*i_ungetc)(0xFF,f);
2381                 }else (*i_ungetc)(c2,f);
2382                 (*i_ungetc)(0xFE,f);
2383             }else if(c2 == 0xFF){
2384                 if((c2 = (*i_getc)(f)) == 0xFE){
2385                     if(!input_f){
2386                         set_iconv(TRUE, w_iconv32);
2387                     }
2388                     if (iconv == w_iconv32) {
2389                         input_endian = ENDIAN_2143;
2390                         return;
2391                     }
2392                     (*i_ungetc)(0xFF,f);
2393                 }else (*i_ungetc)(c2,f);
2394                 (*i_ungetc)(0xFF,f);
2395             }else (*i_ungetc)(c2,f);
2396             (*i_ungetc)(0x00,f);
2397         }else (*i_ungetc)(c2,f);
2398         (*i_ungetc)(0x00,f);
2399         break;
2400     case 0xEF:
2401         if((c2 = (*i_getc)(f)) == 0xBB){
2402             if((c2 = (*i_getc)(f)) == 0xBF){
2403                 if(!input_f){
2404                     set_iconv(TRUE, w_iconv);
2405                 }
2406                 if (iconv == w_iconv) {
2407                     return;
2408                 }
2409                 (*i_ungetc)(0xBF,f);
2410             }else (*i_ungetc)(c2,f);
2411             (*i_ungetc)(0xBB,f);
2412         }else (*i_ungetc)(c2,f);
2413         (*i_ungetc)(0xEF,f);
2414         break;
2415     case 0xFE:
2416         if((c2 = (*i_getc)(f)) == 0xFF){
2417             if((c2 = (*i_getc)(f)) == 0x00){
2418                 if((c2 = (*i_getc)(f)) == 0x00){
2419                     if(!input_f){
2420                         set_iconv(TRUE, w_iconv32);
2421                     }
2422                     if (iconv == w_iconv32) {
2423                         input_endian = ENDIAN_3412;
2424                         return;
2425                     }
2426                     (*i_ungetc)(0x00,f);
2427                 }else (*i_ungetc)(c2,f);
2428                 (*i_ungetc)(0x00,f);
2429             }else (*i_ungetc)(c2,f);
2430             if(!input_f){
2431                 set_iconv(TRUE, w_iconv16);
2432             }
2433             if (iconv == w_iconv16) {
2434                 input_endian = ENDIAN_BIG;
2435                 return;
2436             }
2437             (*i_ungetc)(0xFF,f);
2438         }else (*i_ungetc)(c2,f);
2439         (*i_ungetc)(0xFE,f);
2440         break;
2441     case 0xFF:
2442         if((c2 = (*i_getc)(f)) == 0xFE){
2443             if((c2 = (*i_getc)(f)) == 0x00){
2444                 if((c2 = (*i_getc)(f)) == 0x00){
2445                     if(!input_f){
2446                         set_iconv(TRUE, w_iconv32);
2447                     }
2448                     if (iconv == w_iconv32) {
2449                         input_endian = ENDIAN_LITTLE;
2450                         return;
2451                     }
2452                     (*i_ungetc)(0x00,f);
2453                 }else (*i_ungetc)(c2,f);
2454                 (*i_ungetc)(0x00,f);
2455             }else (*i_ungetc)(c2,f);
2456             if(!input_f){
2457                 set_iconv(TRUE, w_iconv16);
2458             }
2459             if (iconv == w_iconv16) {
2460                 input_endian = ENDIAN_LITTLE;
2461                 return;
2462             }
2463             (*i_ungetc)(0xFE,f);
2464         }else (*i_ungetc)(c2,f);
2465         (*i_ungetc)(0xFF,f);
2466         break;
2467     default:
2468         (*i_ungetc)(c2,f);
2469         break;
2470     }
2471 }
2472
2473 /*
2474    Conversion main loop. Code detection only.
2475  */
2476
2477 nkf_char kanji_convert(FILE *f)
2478 {
2479     nkf_char    c3, c2=0, c1, c0=0;
2480     int is_8bit = FALSE;
2481
2482     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2483 #ifdef UTF8_INPUT_ENABLE
2484        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2485 #endif
2486       ){
2487         is_8bit = TRUE;
2488     }
2489
2490     input_mode = ASCII;
2491     output_mode = ASCII;
2492     shift_mode = FALSE;
2493
2494 #define NEXT continue      /* no output, get next */
2495 #define SEND ;             /* output c1 and c2, get next */
2496 #define LAST break         /* end of loop, go closing  */
2497
2498     module_connection();
2499     check_bom(f);
2500
2501     while ((c1 = (*i_getc)(f)) != EOF) {
2502 #ifdef INPUT_CODE_FIX
2503         if (!input_f)
2504 #endif
2505             code_status(c1);
2506         if (c2) {
2507             /* second byte */
2508             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2509                 /* in case of 8th bit is on */
2510                 if (!estab_f&&!mime_decode_mode) {
2511                     /* in case of not established yet */
2512                     /* It is still ambiguious */
2513                     if (h_conv(f, c2, c1)==EOF)
2514                         LAST;
2515                     else
2516                         c2 = 0;
2517                     NEXT;
2518                 } else {
2519                     /* in case of already established */
2520                     if (c1 < AT) {
2521                         /* ignore bogus code and not CP5022x UCD */
2522                         c2 = 0;
2523                         NEXT;
2524                     } else {
2525                         SEND;
2526                     }
2527                 }
2528             } else
2529                 /* second byte, 7 bit code */
2530                 /* it might be kanji shitfted */
2531                 if ((c1 == DEL) || (c1 <= SP)) {
2532                     /* ignore bogus first code */
2533                     c2 = 0;
2534                     NEXT;
2535                 } else
2536                     SEND;
2537         } else {
2538             /* first byte */
2539 #ifdef UTF8_INPUT_ENABLE
2540             if (iconv == w_iconv16) {
2541                 if (input_endian == ENDIAN_BIG) {
2542                     c2 = c1;
2543                     if ((c1 = (*i_getc)(f)) != EOF) {
2544                         if (0xD8 <= c2 && c2 <= 0xDB) {
2545                             if ((c0 = (*i_getc)(f)) != EOF) {
2546                                 c0 <<= 8;
2547                                 if ((c3 = (*i_getc)(f)) != EOF) {
2548                                     c0 |= c3;
2549                                 } else c2 = EOF;
2550                             } else c2 = EOF;
2551                         }
2552                     } else c2 = EOF;
2553                 } else {
2554                     if ((c2 = (*i_getc)(f)) != EOF) {
2555                         if (0xD8 <= c2 && c2 <= 0xDB) {
2556                             if ((c3 = (*i_getc)(f)) != EOF) {
2557                                 if ((c0 = (*i_getc)(f)) != EOF) {
2558                                     c0 <<= 8;
2559                                     c0 |= c3;
2560                                 } else c2 = EOF;
2561                             } else c2 = EOF;
2562                         }
2563                     } else c2 = EOF;
2564                 }
2565                 SEND;
2566             } else if(iconv == w_iconv32){
2567                 int c3 = c1;
2568                 if((c2 = (*i_getc)(f)) != EOF &&
2569                    (c1 = (*i_getc)(f)) != EOF &&
2570                    (c0 = (*i_getc)(f)) != EOF){
2571                     switch(input_endian){
2572                     case ENDIAN_BIG:
2573                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2574                         break;
2575                     case ENDIAN_LITTLE:
2576                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2577                         break;
2578                     case ENDIAN_2143:
2579                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2580                         break;
2581                     case ENDIAN_3412:
2582                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2583                         break;
2584                     }
2585                     c2 = 0;
2586                 }else{
2587                     c2 = EOF;
2588                 }
2589                 SEND;
2590             } else
2591 #endif
2592 #ifdef NUMCHAR_OPTION
2593             if (is_unicode_capsule(c1)){
2594                 SEND;
2595             } else
2596 #endif
2597             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2598                 /* 8 bit code */
2599                 if (!estab_f && !iso8859_f) {
2600                     /* not established yet */
2601                     c2 = c1;
2602                     NEXT;
2603                 } else { /* estab_f==TRUE */
2604                     if (iso8859_f) {
2605                         c2 = ISO8859_1;
2606                         c1 &= 0x7f;
2607                         SEND;
2608                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2609                         /* SJIS X0201 Case... */
2610                         if(iso2022jp_f && x0201_f==NO_X0201) {
2611                             (*oconv)(GETA1, GETA2);
2612                             NEXT;
2613                         } else {
2614                             c2 = X0201;
2615                             c1 &= 0x7f;
2616                             SEND;
2617                         }
2618                     } else if (c1==SSO && iconv != s_iconv) {
2619                         /* EUC X0201 Case */
2620                         c1 = (*i_getc)(f);  /* skip SSO */
2621                         code_status(c1);
2622                         if (SSP<=c1 && c1<0xe0) {
2623                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2624                                 (*oconv)(GETA1, GETA2);
2625                                 NEXT;
2626                             } else {
2627                                 c2 = X0201;
2628                                 c1 &= 0x7f;
2629                                 SEND;
2630                             }
2631                         } else  { /* bogus code, skip SSO and one byte */
2632                             NEXT;
2633                         }
2634                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2635                                (c1 == 0xFD || c1 == 0xFE)) {
2636                         /* CP10001 */
2637                         c2 = X0201;
2638                         c1 &= 0x7f;
2639                         SEND;
2640                     } else {
2641                        /* already established */
2642                        c2 = c1;
2643                        NEXT;
2644                     }
2645                 }
2646             } else if ((c1 > SP) && (c1 != DEL)) {
2647                 /* in case of Roman characters */
2648                 if (shift_mode) {
2649                     /* output 1 shifted byte */
2650                     if (iso8859_f) {
2651                         c2 = ISO8859_1;
2652                         SEND;
2653                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2654                       /* output 1 shifted byte */
2655                         if(iso2022jp_f && x0201_f==NO_X0201) {
2656                             (*oconv)(GETA1, GETA2);
2657                             NEXT;
2658                         } else {
2659                             c2 = X0201;
2660                             SEND;
2661                         }
2662                     } else {
2663                         /* look like bogus code */
2664                         NEXT;
2665                     }
2666                 } else if (input_mode == X0208 || input_mode == X0212 ||
2667                            input_mode == X0213_1 || input_mode == X0213_2) {
2668                     /* in case of Kanji shifted */
2669                     c2 = c1;
2670                     NEXT;
2671                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2672                     /* Check MIME code */
2673                     if ((c1 = (*i_getc)(f)) == EOF) {
2674                         (*oconv)(0, '=');
2675                         LAST;
2676                     } else if (c1 == '?') {
2677                         /* =? is mime conversion start sequence */
2678                         if(mime_f == STRICT_MIME) {
2679                             /* check in real detail */
2680                             if (mime_begin_strict(f) == EOF)
2681                                 LAST;
2682                             else
2683                                 NEXT;
2684                         } else if (mime_begin(f) == EOF)
2685                             LAST;
2686                         else
2687                             NEXT;
2688                     } else {
2689                         (*oconv)(0, '=');
2690                         (*i_ungetc)(c1,f);
2691                         NEXT;
2692                     }
2693                 } else {
2694                     /* normal ASCII code */
2695                     SEND;
2696                 }
2697             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2698                 shift_mode = FALSE;
2699                 NEXT;
2700             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2701                 shift_mode = TRUE;
2702                 NEXT;
2703             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2704                 if ((c1 = (*i_getc)(f)) == EOF) {
2705                     /*  (*oconv)(0, ESC); don't send bogus code */
2706                     LAST;
2707                 } else if (c1 == '$') {
2708                     if ((c1 = (*i_getc)(f)) == EOF) {
2709                         /*
2710                         (*oconv)(0, ESC); don't send bogus code
2711                         (*oconv)(0, '$'); */
2712                         LAST;
2713                     } else if (c1 == '@'|| c1 == 'B') {
2714                         /* This is kanji introduction */
2715                         input_mode = X0208;
2716                         shift_mode = FALSE;
2717                         set_input_codename("ISO-2022-JP");
2718 #ifdef CHECK_OPTION
2719                         debug("ISO-2022-JP");
2720 #endif
2721                         NEXT;
2722                     } else if (c1 == '(') {
2723                         if ((c1 = (*i_getc)(f)) == EOF) {
2724                             /* don't send bogus code
2725                             (*oconv)(0, ESC);
2726                             (*oconv)(0, '$');
2727                             (*oconv)(0, '(');
2728                                 */
2729                             LAST;
2730                         } else if (c1 == '@'|| c1 == 'B') {
2731                             /* This is kanji introduction */
2732                             input_mode = X0208;
2733                             shift_mode = FALSE;
2734                             NEXT;
2735 #ifdef X0212_ENABLE
2736                         } else if (c1 == 'D'){
2737                             input_mode = X0212;
2738                             shift_mode = FALSE;
2739                             NEXT;
2740 #endif /* X0212_ENABLE */
2741                         } else if (c1 == (X0213_1&0x7F)){
2742                             input_mode = X0213_1;
2743                             shift_mode = FALSE;
2744                             NEXT;
2745                         } else if (c1 == (X0213_2&0x7F)){
2746                             input_mode = X0213_2;
2747                             shift_mode = FALSE;
2748                             NEXT;
2749                         } else {
2750                             /* could be some special code */
2751                             (*oconv)(0, ESC);
2752                             (*oconv)(0, '$');
2753                             (*oconv)(0, '(');
2754                             (*oconv)(0, c1);
2755                             NEXT;
2756                         }
2757                     } else if (broken_f&0x2) {
2758                         /* accept any ESC-(-x as broken code ... */
2759                         input_mode = X0208;
2760                         shift_mode = FALSE;
2761                         NEXT;
2762                     } else {
2763                         (*oconv)(0, ESC);
2764                         (*oconv)(0, '$');
2765                         (*oconv)(0, c1);
2766                         NEXT;
2767                     }
2768                 } else if (c1 == '(') {
2769                     if ((c1 = (*i_getc)(f)) == EOF) {
2770                         /* don't send bogus code
2771                         (*oconv)(0, ESC);
2772                         (*oconv)(0, '('); */
2773                         LAST;
2774                     } else {
2775                         if (c1 == 'I') {
2776                             /* This is X0201 kana introduction */
2777                             input_mode = X0201; shift_mode = X0201;
2778                             NEXT;
2779                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2780                             /* This is X0208 kanji introduction */
2781                             input_mode = ASCII; shift_mode = FALSE;
2782                             NEXT;
2783                         } else if (broken_f&0x2) {
2784                             input_mode = ASCII; shift_mode = FALSE;
2785                             NEXT;
2786                         } else {
2787                             (*oconv)(0, ESC);
2788                             (*oconv)(0, '(');
2789                             /* maintain various input_mode here */
2790                             SEND;
2791                         }
2792                     }
2793                } else if ( c1 == 'N' || c1 == 'n'){
2794                    /* SS2 */
2795                    c3 = (*i_getc)(f);  /* skip SS2 */
2796                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2797                        c1 = c3;
2798                        c2 = X0201;
2799                        SEND;
2800                    }else{
2801                        (*i_ungetc)(c3, f);
2802                        /* lonely ESC  */
2803                        (*oconv)(0, ESC);
2804                        SEND;
2805                    }
2806                 } else {
2807                     /* lonely ESC  */
2808                     (*oconv)(0, ESC);
2809                     SEND;
2810                 }
2811             } else if (c1 == ESC && iconv == s_iconv) {
2812                 /* ESC in Shift_JIS */
2813                 if ((c1 = (*i_getc)(f)) == EOF) {
2814                     /*  (*oconv)(0, ESC); don't send bogus code */
2815                     LAST;
2816                 } else if (c1 == '$') {
2817                     /* J-PHONE emoji */
2818                     if ((c1 = (*i_getc)(f)) == EOF) {
2819                         /*
2820                            (*oconv)(0, ESC); don't send bogus code
2821                            (*oconv)(0, '$'); */
2822                         LAST;
2823                     } else {
2824                         if (('E' <= c1 && c1 <= 'G') ||
2825                             ('O' <= c1 && c1 <= 'Q')) {
2826                             /*
2827                                NUM : 0 1 2 3 4 5
2828                                BYTE: G E F O P Q
2829                                C%7 : 1 6 0 2 3 4
2830                                C%7 : 0 1 2 3 4 5 6
2831                                NUM : 2 0 3 4 5 X 1
2832                              */
2833                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2834                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2835                             while ((c1 = (*i_getc)(f)) != EOF) {
2836                                 if (SP <= c1 && c1 <= 'z') {
2837                                     (*oconv)(0, c1 + c0);
2838                                 } else break; /* c1 == SO */
2839                             }
2840                         }
2841                     }
2842                     if (c1 == EOF) LAST;
2843                     NEXT;
2844                 } else {
2845                     /* lonely ESC  */
2846                     (*oconv)(0, ESC);
2847                     SEND;
2848                 }
2849             } else if (c1 == LF || c1 == CR) {
2850                 if (broken_f&4) {
2851                     input_mode = ASCII; set_iconv(FALSE, 0);
2852                     SEND;
2853                 } else if (mime_decode_f && !mime_decode_mode){
2854                     if (c1 == LF) {
2855                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2856                             i_ungetc(SP,f);
2857                             continue;
2858                         } else {
2859                             i_ungetc(c1,f);
2860                         }
2861                         c1 = LF;
2862                         SEND;
2863                     } else  { /* if (c1 == CR)*/
2864                         if ((c1=(*i_getc)(f))!=EOF) {
2865                             if (c1==SP) {
2866                                 i_ungetc(SP,f);
2867                                 continue;
2868                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
2869                                 i_ungetc(SP,f);
2870                                 continue;
2871                             } else {
2872                                 i_ungetc(c1,f);
2873                             }
2874                             i_ungetc(LF,f);
2875                         } else {
2876                             i_ungetc(c1,f);
2877                         }
2878                         c1 = CR;
2879                         SEND;
2880                     }
2881                 }
2882             } else if (c1 == DEL && input_mode == X0208) {
2883                 /* CP5022x */
2884                 c2 = c1;
2885                 NEXT;
2886             } else
2887                 SEND;
2888         }
2889         /* send: */
2890         switch(input_mode){
2891         case ASCII:
2892             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2893             case -2:
2894                 /* 4 bytes UTF-8 */
2895                 if ((c0 = (*i_getc)(f)) != EOF) {
2896                     code_status(c0);
2897                     c0 <<= 8;
2898                     if ((c3 = (*i_getc)(f)) != EOF) {
2899                         code_status(c3);
2900                         (*iconv)(c2, c1, c0|c3);
2901                     }
2902                 }
2903                 break;
2904             case -1:
2905                 /* 3 bytes EUC or UTF-8 */
2906                 if ((c0 = (*i_getc)(f)) != EOF) {
2907                     code_status(c0);
2908                     (*iconv)(c2, c1, c0);
2909                 }
2910                 break;
2911             }
2912             break;
2913         case X0208:
2914         case X0213_1:
2915             if (ms_ucs_map_f &&
2916                 0x7F <= c2 && c2 <= 0x92 &&
2917                 0x21 <= c1 && c1 <= 0x7E) {
2918                 /* CP932 UDC */
2919                 if(c1 == 0x7F) return 0;
2920                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2921                 c2 = 0;
2922             }
2923             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2924             break;
2925 #ifdef X0212_ENABLE
2926         case X0212:
2927             (*oconv)(PREFIX_EUCG3 | c2, c1);
2928             break;
2929 #endif /* X0212_ENABLE */
2930         case X0213_2:
2931             (*oconv)(PREFIX_EUCG3 | c2, c1);
2932             break;
2933         default:
2934             (*oconv)(input_mode, c1);  /* other special case */
2935         }
2936
2937         c2 = 0;
2938         c0 = 0;
2939         continue;
2940         /* goto next_word */
2941     }
2942
2943     /* epilogue */
2944     (*iconv)(EOF, 0, 0);
2945     if (!input_codename)
2946     {
2947         if (is_8bit) {
2948             struct input_code *p = input_code_list;
2949             struct input_code *result = p;
2950             while (p->name){
2951                 if (p->score < result->score) result = p;
2952                 ++p;
2953             }
2954             set_input_codename(result->name);
2955 #ifdef CHECK_OPTION
2956             debug(result->name);
2957 #endif
2958         }
2959     }
2960     return 1;
2961 }
2962
2963 nkf_char
2964 h_conv(FILE *f, nkf_char c2, nkf_char c1)
2965 {
2966     nkf_char ret, c3, c0;
2967     int hold_index;
2968
2969
2970     /** it must NOT be in the kanji shifte sequence      */
2971     /** it must NOT be written in JIS7                   */
2972     /** and it must be after 2 byte 8bit code            */
2973
2974     hold_count = 0;
2975     push_hold_buf(c2);
2976     push_hold_buf(c1);
2977
2978     while ((c1 = (*i_getc)(f)) != EOF) {
2979         if (c1 == ESC){
2980             (*i_ungetc)(c1,f);
2981             break;
2982         }
2983         code_status(c1);
2984         if (push_hold_buf(c1) == EOF || estab_f){
2985             break;
2986         }
2987     }
2988
2989     if (!estab_f){
2990         struct input_code *p = input_code_list;
2991         struct input_code *result = p;
2992         if (c1 == EOF){
2993             code_status(c1);
2994         }
2995         while (p->name){
2996             if (p->status_func && p->score < result->score){
2997                 result = p;
2998             }
2999             ++p;
3000         }
3001         set_iconv(TRUE, result->iconv_func);
3002     }
3003
3004
3005     /** now,
3006      ** 1) EOF is detected, or
3007      ** 2) Code is established, or
3008      ** 3) Buffer is FULL (but last word is pushed)
3009      **
3010      ** in 1) and 3) cases, we continue to use
3011      ** Kanji codes by oconv and leave estab_f unchanged.
3012      **/
3013
3014     ret = c1;
3015     hold_index = 0;
3016     while (hold_index < hold_count){
3017         c2 = hold_buf[hold_index++];
3018         if (c2 <= DEL
3019 #ifdef NUMCHAR_OPTION
3020             || is_unicode_capsule(c2)
3021 #endif
3022             ){
3023             (*iconv)(0, c2, 0);
3024             continue;
3025         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3026             (*iconv)(X0201, c2, 0);
3027             continue;
3028         }
3029         if (hold_index < hold_count){
3030             c1 = hold_buf[hold_index++];
3031         }else{
3032             c1 = (*i_getc)(f);
3033             if (c1 == EOF){
3034                 c3 = EOF;
3035                 break;
3036             }
3037             code_status(c1);
3038         }
3039         c0 = 0;
3040         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3041         case -2:
3042             /* 4 bytes UTF-8 */
3043             if (hold_index < hold_count){
3044                 c0 = hold_buf[hold_index++];
3045             } else if ((c0 = (*i_getc)(f)) == EOF) {
3046                 ret = EOF;
3047                 break;
3048             } else {
3049                 code_status(c0);
3050                 c0 <<= 8;
3051                 if (hold_index < hold_count){
3052                     c3 = hold_buf[hold_index++];
3053                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3054                     c0 = ret = EOF;
3055                     break;
3056                 } else {
3057                     code_status(c3);
3058                     (*iconv)(c2, c1, c0|c3);
3059                 }
3060             }
3061             break;
3062         case -1:
3063             /* 3 bytes EUC or UTF-8 */
3064             if (hold_index < hold_count){
3065                 c0 = hold_buf[hold_index++];
3066             } else if ((c0 = (*i_getc)(f)) == EOF) {
3067                 ret = EOF;
3068                 break;
3069             } else {
3070                 code_status(c0);
3071             }
3072             (*iconv)(c2, c1, c0);
3073             break;
3074         }
3075         if (c0 == EOF) break;
3076     }
3077     return ret;
3078 }
3079
3080 nkf_char push_hold_buf(nkf_char c2)
3081 {
3082     if (hold_count >= HOLD_SIZE*2)
3083         return (EOF);
3084     hold_buf[hold_count++] = (unsigned char)c2;
3085     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3086 }
3087
3088 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3089 {
3090 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3091     nkf_char val;
3092 #endif
3093     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3094 #ifdef SHIFTJIS_CP932
3095     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3096         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3097         if (val){
3098             c2 = val >> 8;
3099             c1 = val & 0xff;
3100         }
3101     }
3102     if (cp932inv_f
3103         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3104         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3105         if (c){
3106             c2 = c >> 8;
3107             c1 = c & 0xff;
3108         }
3109     }
3110 #endif /* SHIFTJIS_CP932 */
3111 #ifdef X0212_ENABLE
3112     if (!x0213_f && is_ibmext_in_sjis(c2)){
3113         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3114         if (val){
3115             if (val > 0x7FFF){
3116                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3117                 c1 = val & 0xff;
3118             }else{
3119                 c2 = val >> 8;
3120                 c1 = val & 0xff;
3121             }
3122             if (p2) *p2 = c2;
3123             if (p1) *p1 = c1;
3124             return 0;
3125         }
3126     }
3127 #endif
3128     if(c2 >= 0x80){
3129         if(x0213_f && c2 >= 0xF0){
3130             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3131                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3132             }else{ /* 78<=k<=94 */
3133                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3134                 if (0x9E < c1) c2++;
3135             }
3136         }else{
3137             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3138             if (0x9E < c1) c2++;
3139         }
3140         if (c1 < 0x9F)
3141             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3142         else {
3143             c1 = c1 - 0x7E;
3144         }
3145     }
3146
3147 #ifdef X0212_ENABLE
3148     c2 = x0212_unshift(c2);
3149 #endif
3150     if (p2) *p2 = c2;
3151     if (p1) *p1 = c1;
3152     return 0;
3153 }
3154
3155 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3156 {
3157     if (c2 == X0201) {
3158         c1 &= 0x7f;
3159     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3160         /* NOP */
3161     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3162         /* CP932 UDC */
3163         if(c1 == 0x7F) return 0;
3164         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3165         c2 = 0;
3166     } else {
3167         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3168         if (ret) return ret;
3169     }
3170     (*oconv)(c2, c1);
3171     return 0;
3172 }
3173
3174 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3175 {
3176     if (c2 == X0201) {
3177         c1 &= 0x7f;
3178 #ifdef X0212_ENABLE
3179     }else if (c2 == 0x8f){
3180         if (c0 == 0){
3181             return -1;
3182         }
3183         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3184             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3185             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3186             c2 = 0;
3187         } else {
3188             c2 = (c2 << 8) | (c1 & 0x7f);
3189             c1 = c0 & 0x7f;
3190 #ifdef SHIFTJIS_CP932
3191             if (cp51932_f){
3192                 nkf_char s2, s1;
3193                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3194                     s2e_conv(s2, s1, &c2, &c1);
3195                     if (c2 < 0x100){
3196                         c1 &= 0x7f;
3197                         c2 &= 0x7f;
3198                     }
3199                 }
3200             }
3201 #endif /* SHIFTJIS_CP932 */
3202         }
3203 #endif /* X0212_ENABLE */
3204     } else if (c2 == SSO){
3205         c2 = X0201;
3206         c1 &= 0x7f;
3207     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3208         /* NOP */
3209     } else {
3210         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3211             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3212             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3213             c2 = 0;
3214         } else {
3215             c1 &= 0x7f;
3216             c2 &= 0x7f;
3217 #ifdef SHIFTJIS_CP932
3218             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3219                 nkf_char s2, s1;
3220                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3221                     s2e_conv(s2, s1, &c2, &c1);
3222                     if (c2 < 0x100){
3223                         c1 &= 0x7f;
3224                         c2 &= 0x7f;
3225                     }
3226                 }
3227             }
3228 #endif /* SHIFTJIS_CP932 */
3229         }
3230     }
3231     (*oconv)(c2, c1);
3232     return 0;
3233 }
3234
3235 #ifdef UTF8_INPUT_ENABLE
3236 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3237 {
3238     nkf_char ret = 0;
3239
3240     if (!c1){
3241         *p2 = 0;
3242         *p1 = c2;
3243     }else if (0xc0 <= c2 && c2 <= 0xef) {
3244         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3245 #ifdef NUMCHAR_OPTION
3246         if (ret > 0){
3247             if (p2) *p2 = 0;
3248             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3249             ret = 0;
3250         }
3251 #endif
3252     }
3253     return ret;
3254 }
3255
3256 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3257 {
3258     nkf_char ret = 0;
3259     static const char w_iconv_utf8_1st_byte[] =
3260     { /* 0xC0 - 0xFF */
3261         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3262         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3263         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3264         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3265
3266     if (c2 < 0 || 0xff < c2) {
3267     }else if (c2 == 0) { /* 0 : 1 byte*/
3268         c0 = 0;
3269     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3270         return 0;
3271     } else{
3272         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3273         case 21:
3274             if (c1 < 0x80 || 0xBF < c1) return 0;
3275             break;
3276         case 30:
3277             if (c0 == 0) return -1;
3278             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3279                 return 0;
3280             break;
3281         case 31:
3282         case 33:
3283             if (c0 == 0) return -1;
3284             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3285                 return 0;
3286             break;
3287         case 32:
3288             if (c0 == 0) return -1;
3289             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3290                 return 0;
3291             break;
3292         case 40:
3293             if (c0 == 0) return -2;
3294             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3295                 return 0;
3296             break;
3297         case 41:
3298             if (c0 == 0) return -2;
3299             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3300                 return 0;
3301             break;
3302         case 42:
3303             if (c0 == 0) return -2;
3304             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3305                 return 0;
3306             break;
3307         default:
3308             return 0;
3309             break;
3310         }
3311     }
3312     if (c2 == 0 || c2 == EOF){
3313     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3314         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3315         c2 = 0;
3316     } else {
3317         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3318     }
3319     if (ret == 0){
3320         (*oconv)(c2, c1);
3321     }
3322     return ret;
3323 }
3324 #endif
3325
3326 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3327 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3328 {
3329     val &= VALUE_MASK;
3330     if (val < 0x80){
3331         *p2 = val;
3332         *p1 = 0;
3333         *p0 = 0;
3334     }else if (val < 0x800){
3335         *p2 = 0xc0 | (val >> 6);
3336         *p1 = 0x80 | (val & 0x3f);
3337         *p0 = 0;
3338     } else if (val <= NKF_INT32_C(0xFFFF)) {
3339         *p2 = 0xe0 | (val >> 12);
3340         *p1 = 0x80 | ((val >> 6) & 0x3f);
3341         *p0 = 0x80 | (val        & 0x3f);
3342     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3343         *p2 = 0xe0 |  (val >> 16);
3344         *p1 = 0x80 | ((val >> 12) & 0x3f);
3345         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3346     } else {
3347         *p2 = 0;
3348         *p1 = 0;
3349         *p0 = 0;
3350     }
3351 }
3352 #endif
3353
3354 #ifdef UTF8_INPUT_ENABLE
3355 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3356 {
3357     nkf_char val;
3358     if (c2 >= 0xf8) {
3359         val = -1;
3360     } else if (c2 >= 0xf0){
3361         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3362         val = (c2 & 0x0f) << 18;
3363         val |= (c1 & 0x3f) << 12;
3364         val |= (c0 & 0x3f00) >> 2;
3365         val |= (c0 & 0x3f);
3366     }else if (c2 >= 0xe0){
3367         val = (c2 & 0x0f) << 12;
3368         val |= (c1 & 0x3f) << 6;
3369         val |= (c0 & 0x3f);
3370     }else if (c2 >= 0xc0){
3371         val = (c2 & 0x1f) << 6;
3372         val |= (c1 & 0x3f);
3373     }else{
3374         val = c2;
3375     }
3376     return val;
3377 }
3378
3379 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3380 {
3381     nkf_char c2, c1, c0;
3382     nkf_char ret = 0;
3383     val &= VALUE_MASK;
3384     if (val < 0x80){
3385         *p2 = 0;
3386         *p1 = val;
3387     }else{
3388         w16w_conv(val, &c2, &c1, &c0);
3389         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3390 #ifdef NUMCHAR_OPTION
3391         if (ret > 0){
3392             *p2 = 0;
3393             *p1 = CLASS_UNICODE | val;
3394             ret = 0;
3395         }
3396 #endif
3397     }
3398     return ret;
3399 }
3400 #endif
3401
3402 #ifdef UTF8_INPUT_ENABLE
3403 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3404 {
3405     nkf_char ret = 0;
3406     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3407         (*oconv)(c2, c1);
3408         return 0;
3409     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3410         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3411             return -2;
3412         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3413         c2 = 0;
3414     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3415         /*
3416            return 2;
3417         */
3418         return 1;
3419     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3420     if (ret) return ret;
3421     (*oconv)(c2, c1);
3422     return 0;
3423 }
3424
3425 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3426 {
3427     int ret = 0;
3428
3429     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3430     } else if (is_unicode_bmp(c1)) {
3431         ret = w16e_conv(c1, &c2, &c1);
3432     } else {
3433         c2 = 0;
3434         c1 =  CLASS_UNICODE | c1;
3435     }
3436     if (ret) return ret;
3437     (*oconv)(c2, c1);
3438     return 0;
3439 }
3440
3441 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3442 {
3443     const unsigned short *const *pp;
3444     const unsigned short *const *const *ppp;
3445     static const char no_best_fit_chars_table_C2[] =
3446     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3447         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3448         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3449         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3450     static const char no_best_fit_chars_table_C2_ms[] =
3451     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3452         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3453         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3454         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3455     static const char no_best_fit_chars_table_932_C2[] =
3456     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3457         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3458         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3459         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3460     static const char no_best_fit_chars_table_932_C3[] =
3461     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3462         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3463         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3464         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3465     nkf_char ret = 0;
3466
3467     if(c2 < 0x80){
3468         *p2 = 0;
3469         *p1 = c2;
3470     }else if(c2 < 0xe0){
3471         if(no_best_fit_chars_f){
3472             if(ms_ucs_map_f == UCS_MAP_CP932){
3473                 switch(c2){
3474                 case 0xC2:
3475                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3476                     break;
3477                 case 0xC3:
3478                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3479                     break;
3480                 }
3481             }else if(!cp932inv_f){
3482                 switch(c2){
3483                 case 0xC2:
3484                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3485                     break;
3486                 case 0xC3:
3487                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3488                     break;
3489                 }
3490             }else if(ms_ucs_map_f == UCS_MAP_MS){
3491                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3492             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3493                 switch(c2){
3494                 case 0xC2:
3495                     switch(c1){
3496                     case 0xA2:
3497                     case 0xA3:
3498                     case 0xA5:
3499                     case 0xA6:
3500                     case 0xAC:
3501                     case 0xAF:
3502                     case 0xB8:
3503                         return 1;
3504                     }
3505                     break;
3506                 }
3507             }
3508         }
3509         pp =
3510             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3511             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3512             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3513             utf8_to_euc_2bytes;
3514         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3515     }else if(c0 < 0xF0){
3516         if(no_best_fit_chars_f){
3517             if(ms_ucs_map_f == UCS_MAP_CP932){
3518                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3519             }else if(ms_ucs_map_f == UCS_MAP_MS){
3520                 switch(c2){
3521                 case 0xE2:
3522                     switch(c1){
3523                     case 0x80:
3524                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3525                         break;
3526                     case 0x88:
3527                         if(c0 == 0x92) return 1;
3528                         break;
3529                     }
3530                     break;
3531                 case 0xE3:
3532                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3533                     break;
3534                 }
3535             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3536                 switch(c2){
3537                 case 0xE3:
3538                     switch(c1){
3539                     case 0x82:
3540                             if(c0 == 0x94) return 1;
3541                         break;
3542                     case 0x83:
3543                             if(c0 == 0xBB) return 1;
3544                         break;
3545                     }