OSDN Git Service

* some code maintenance.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B 
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program 
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.  
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30 ** UTF-8 \e$B%5%]!<%H$K$D$$$F\e(B
31 **    \e$B=>Mh$N\e(B nkf \e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9\e(B
32 **    nkf -e \e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G\e(B UTF-8 \e$B$HH=Dj$5$l$l$P!"\e(B
33 **    \e$B$=$N$^$^\e(B euc-jp \e$B$KJQ49$5$l$^$9\e(B
34 **
35 **    \e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#\e(B
36 **    (\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O\e(B)
37 **
38 **    \e$B2?$+LdBj$r8+$D$1$?$i!"\e(B
39 **        E-Mail: furukawa@tcp-ip.or.jp
40 **    \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.132 2007/09/19 11:51:55 naruse Exp $ */
43 #define NKF_VERSION "2.0.8"
44 #define NKF_RELEASE_DATE "2007-09-12"
45 #include "config.h"
46 #include "utf8tbl.h"
47
48 #define COPY_RIGHT \
49     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
50     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
51
52
53 /*
54 **
55 **
56 **
57 ** USAGE:       nkf [flags] [file] 
58 **
59 ** Flags:
60 ** b    Output is buffered             (DEFAULT)
61 ** u    Output is unbuffered
62 **
63 ** t    no operation
64 **
65 ** j    Output code is JIS 7 bit        (DEFAULT SELECT) 
66 ** s    Output code is MS Kanji         (DEFAULT SELECT) 
67 ** e    Output code is AT&T JIS         (DEFAULT SELECT) 
68 ** w    Output code is AT&T JIS         (DEFAULT SELECT) 
69 ** l    Output code is JIS 7bit and ISO8859-1 Latin-1
70 **
71 ** m    MIME conversion for ISO-2022-JP
72 ** I    Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
73 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
74 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
75 ** M    MIME output conversion 
76 **
77 ** r  {de/en}crypt ROT13/47
78 **
79 ** v  display Version
80 **
81 ** T  Text mode output        (for MS-DOS)
82 **
83 ** x    Do not convert X0201 kana into X0208
84 ** Z    Convert X0208 alphabet to ASCII
85 **
86 ** f60  fold option
87 **
88 ** m    MIME decode
89 ** B    try to fix broken JIS, missing Escape
90 ** B[1-9]  broken level
91 **
92 ** O   Output to 'nkf.out' file or last file name
93 ** d   Delete \r in line feed 
94 ** c   Add \r in line feed 
95 ** -- other long option
96 ** -- ignore following option (don't use with -O )
97 **
98 **/
99
100 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
101 #define MSDOS
102 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
103 #define __WIN32__
104 #endif
105 #endif
106
107 #ifdef PERL_XS
108 #undef OVERWRITE
109 #endif
110
111 #ifndef PERL_XS
112 #include <stdio.h>
113 #endif
114
115 #include <stdlib.h>
116 #include <string.h>
117
118 #if defined(MSDOS) || defined(__OS2__)
119 #include <fcntl.h>
120 #include <io.h>
121 #if defined(_MSC_VER) || defined(__WATCOMC__)
122 #define mktemp _mktemp
123 #endif
124 #endif
125
126 #ifdef MSDOS
127 #ifdef LSI_C
128 #define setbinmode(fp) fsetbin(fp)
129 #elif defined(__DJGPP__)
130 #include <libc/dosio.h>
131 #define setbinmode(fp) djgpp_setbinmode(fp)
132 #else /* Microsoft C, Turbo C */
133 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
134 #endif
135 #else /* UNIX */
136 #define setbinmode(fp)
137 #endif
138
139 #if defined(__DJGPP__)
140 void  djgpp_setbinmode(FILE *fp)
141 {
142     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
143     int fd, m;
144     fd = fileno(fp);
145     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
146     __file_handle_set(fd, m);
147 }
148 #endif
149
150 #ifdef _IOFBF /* SysV and MSDOS, Windows */
151 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
152 #else /* BSD */
153 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
154 #endif
155
156 /*Borland C++ 4.5 EasyWin*/
157 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
158 #define         EASYWIN
159 #ifndef __WIN16__
160 #define __WIN16__
161 #endif
162 #include <windows.h>
163 #endif
164
165 #ifdef OVERWRITE
166 /* added by satoru@isoternet.org */
167 #if defined(__EMX__)
168 #include <sys/types.h>
169 #endif
170 #include <sys/stat.h>
171 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
172 #include <unistd.h>
173 #if defined(__WATCOMC__)
174 #include <sys/utime.h>
175 #else
176 #include <utime.h>
177 #endif
178 #else /* defined(MSDOS) */
179 #ifdef __WIN32__
180 #ifdef __BORLANDC__ /* BCC32 */
181 #include <utime.h>
182 #else /* !defined(__BORLANDC__) */
183 #include <sys/utime.h>
184 #endif /* (__BORLANDC__) */
185 #else /* !defined(__WIN32__) */
186 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
187 #include <sys/utime.h>
188 #elif defined(__TURBOC__) /* BCC */
189 #include <utime.h>
190 #elif defined(LSI_C) /* LSI C */
191 #endif /* (__WIN32__) */
192 #endif
193 #endif
194 #endif
195
196 #define         FALSE   0
197 #define         TRUE    1
198
199 /* state of output_mode and input_mode  
200
201    c2           0 means ASCII
202                 X0201
203                 ISO8859_1
204                 X0208
205                 EOF      all termination
206    c1           32bit data
207
208  */
209
210 #define         ASCII           0
211 #define         X0208           1
212 #define         X0201           2
213 #define         ISO8859_1       8
214 #define         NO_X0201        3
215 #define         X0212      0x2844
216 #define         X0213_1    0x284F
217 #define         X0213_2    0x2850
218
219 /* Input Assumption */
220
221 #define         JIS_INPUT       4
222 #define         EUC_INPUT      16
223 #define         SJIS_INPUT      5
224 #define         LATIN1_INPUT    6
225 #define         FIXED_MIME      7
226 #define         STRICT_MIME     8
227
228 /* MIME ENCODE */
229
230 #define         ISO2022JP       9
231 #define         JAPANESE_EUC   10
232 #define         SHIFT_JIS      11
233
234 #define         UTF8           12
235 #define         UTF8_INPUT     13
236 #define         UTF16_INPUT    1015
237 #define         UTF32_INPUT    1017
238
239 /* byte order */
240
241 #define         ENDIAN_BIG      1234
242 #define         ENDIAN_LITTLE   4321
243 #define         ENDIAN_2143     2143
244 #define         ENDIAN_3412     3412
245
246 #define         WISH_TRUE      15
247
248 /* ASCII CODE */
249
250 #define         BS      0x08
251 #define         TAB     0x09
252 #define         NL      0x0a
253 #define         CR      0x0d
254 #define         ESC     0x1b
255 #define         SPACE   0x20
256 #define         AT      0x40
257 #define         SSP     0xa0
258 #define         DEL     0x7f
259 #define         SI      0x0f
260 #define         SO      0x0e
261 #define         SSO     0x8e
262 #define         SS3     0x8f
263
264 #define         is_alnum(c)  \
265             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
266
267 /* I don't trust portablity of toupper */
268 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
269 #define nkf_isoctal(c)  ('0'<=c && c<='7')
270 #define nkf_isdigit(c)  ('0'<=c && c<='9')
271 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
272 #define nkf_isblank(c) (c == SPACE || c == TAB)
273 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
274 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
275 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
276 #define nkf_isprint(c) (' '<=c && c<='~')
277 #define nkf_isgraph(c) ('!'<=c && c<='~')
278 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
279                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
280                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0 )
281 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
282 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
283
284 #define CP932_TABLE_BEGIN 0xFA
285 #define CP932_TABLE_END   0xFC
286 #define CP932INV_TABLE_BEGIN 0xED
287 #define CP932INV_TABLE_END   0xEE
288 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
289
290 #define         HOLD_SIZE       1024
291 #if defined(INT_IS_SHORT)
292 #define         IOBUF_SIZE      2048
293 #else
294 #define         IOBUF_SIZE      16384
295 #endif
296
297 #define         DEFAULT_J       'B'
298 #define         DEFAULT_R       'B'
299
300 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
301 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
302
303 #define         RANGE_NUM_MAX   18
304 #define         GETA1   0x22
305 #define         GETA2   0x2e
306
307
308 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
309 #define sizeof_euc_to_utf8_1byte 94
310 #define sizeof_euc_to_utf8_2bytes 94
311 #define sizeof_utf8_to_euc_C2 64
312 #define sizeof_utf8_to_euc_E5B8 64
313 #define sizeof_utf8_to_euc_2bytes 112
314 #define sizeof_utf8_to_euc_3bytes 16
315 #endif
316
317 /* MIME preprocessor */
318
319 #ifdef EASYWIN /*Easy Win */
320 extern POINT _BufferSize;
321 #endif
322
323 struct input_code{
324     char *name;
325     nkf_char stat;
326     nkf_char score;
327     nkf_char index;
328     nkf_char buf[3];
329     void (*status_func)(struct input_code *, nkf_char);
330     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
331     int _file_stat;
332 };
333
334 static char *input_codename = "";
335
336 #ifndef PERL_XS
337 static const char *CopyRight = COPY_RIGHT;
338 #endif
339 #if !defined(PERL_XS) && !defined(WIN32DLL)
340 static  nkf_char     noconvert(FILE *f);
341 #endif
342 static  void    module_connection(void);
343 static  nkf_char     kanji_convert(FILE *f);
344 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
345 static  nkf_char     push_hold_buf(nkf_char c2);
346 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
347 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
348 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
349 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
350 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
351 /* UCS Mapping
352  * 0: Shift_JIS, eucJP-ascii
353  * 1: eucJP-ms
354  * 2: CP932, CP51932
355  * 3: CP10001
356  */
357 #define UCS_MAP_ASCII   0
358 #define UCS_MAP_MS      1
359 #define UCS_MAP_CP932   2
360 #define UCS_MAP_CP10001 3
361 static int ms_ucs_map_f = UCS_MAP_ASCII;
362 #endif
363 #ifdef UTF8_INPUT_ENABLE
364 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
365 static  int     no_cp932ext_f = FALSE;
366 /* ignore ZERO WIDTH NO-BREAK SPACE */
367 static  int     no_best_fit_chars_f = FALSE;
368 static  int     input_endian = ENDIAN_BIG;
369 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
370 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
371 static  void    encode_fallback_html(nkf_char c);
372 static  void    encode_fallback_xml(nkf_char c);
373 static  void    encode_fallback_java(nkf_char c);
374 static  void    encode_fallback_perl(nkf_char c);
375 static  void    encode_fallback_subchar(nkf_char c);
376 static  void    (*encode_fallback)(nkf_char c) = NULL;
377 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
378 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
379 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
380 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
381 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
382 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
383 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
384 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
385 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
386 static  void    w_status(struct input_code *, nkf_char);
387 #endif
388 #ifdef UTF8_OUTPUT_ENABLE
389 static  int     output_bom_f = FALSE;
390 static  int     output_endian = ENDIAN_BIG;
391 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
392 static  void    w_oconv(nkf_char c2,nkf_char c1);
393 static  void    w_oconv16(nkf_char c2,nkf_char c1);
394 static  void    w_oconv32(nkf_char c2,nkf_char c1);
395 #endif
396 static  void    e_oconv(nkf_char c2,nkf_char c1);
397 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
398 static  void    s_oconv(nkf_char c2,nkf_char c1);
399 static  void    j_oconv(nkf_char c2,nkf_char c1);
400 static  void    fold_conv(nkf_char c2,nkf_char c1);
401 static  void    cr_conv(nkf_char c2,nkf_char c1);
402 static  void    z_conv(nkf_char c2,nkf_char c1);
403 static  void    rot_conv(nkf_char c2,nkf_char c1);
404 static  void    hira_conv(nkf_char c2,nkf_char c1);
405 static  void    base64_conv(nkf_char c2,nkf_char c1);
406 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
407 static  void    no_connection(nkf_char c2,nkf_char c1);
408 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
409
410 static  void    code_score(struct input_code *ptr);
411 static  void    code_status(nkf_char c);
412
413 static  void    std_putc(nkf_char c);
414 static  nkf_char     std_getc(FILE *f);
415 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
416
417 static  nkf_char     broken_getc(FILE *f);
418 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
419
420 static  nkf_char     mime_begin(FILE *f);
421 static  nkf_char     mime_getc(FILE *f);
422 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
423
424 static  void    switch_mime_getc(void);
425 static  void    unswitch_mime_getc(void);
426 static  nkf_char     mime_begin_strict(FILE *f);
427 static  nkf_char     mime_getc_buf(FILE *f);
428 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
429 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
430
431 static  nkf_char     base64decode(nkf_char c);
432 static  void    mime_prechar(nkf_char c2, nkf_char c1);
433 static  void    mime_putc(nkf_char c);
434 static  void    open_mime(nkf_char c);
435 static  void    close_mime(void);
436 static  void    eof_mime(void);
437 static  void    mimeout_addchar(nkf_char c);
438 #ifndef PERL_XS
439 static  void    usage(void);
440 static  void    version(void);
441 #endif
442 static  void    options(unsigned char *c);
443 #if defined(PERL_XS) || defined(WIN32DLL)
444 static  void    reinit(void);
445 #endif
446
447 /* buffers */
448
449 #if !defined(PERL_XS) && !defined(WIN32DLL)
450 static unsigned char   stdibuf[IOBUF_SIZE];
451 static unsigned char   stdobuf[IOBUF_SIZE];
452 #endif
453 static unsigned char   hold_buf[HOLD_SIZE*2];
454 static int             hold_count = 0;
455
456 /* MIME preprocessor fifo */
457
458 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
459 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)   
460 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
461 static unsigned char           mime_buf[MIME_BUF_SIZE];
462 static unsigned int            mime_top = 0;
463 static unsigned int            mime_last = 0;  /* decoded */
464 static unsigned int            mime_input = 0; /* undecoded */
465 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
466
467 /* flags */
468 static int             unbuf_f = FALSE;
469 static int             estab_f = FALSE;
470 static int             nop_f = FALSE;
471 static int             binmode_f = TRUE;       /* binary mode */
472 static int             rot_f = FALSE;          /* rot14/43 mode */
473 static int             hira_f = FALSE;          /* hira/kata henkan */
474 static int             input_f = FALSE;        /* non fixed input code  */
475 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
476 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
477 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
478 static int             mimebuf_f = FALSE;      /* MIME buffered input */
479 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
480 static int             iso8859_f = FALSE;      /* ISO8859 through */
481 static int             mimeout_f = FALSE;       /* base64 mode */
482 #if defined(MSDOS) || defined(__OS2__) 
483 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
484 #else
485 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
486 #endif
487 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
488
489 #ifdef UNICODE_NORMALIZATION
490 static int nfc_f = FALSE;
491 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
492 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
493 static nkf_char nfc_getc(FILE *f);
494 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
495 #endif
496
497 #ifdef INPUT_OPTION
498 static int cap_f = FALSE;
499 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
500 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
501 static nkf_char cap_getc(FILE *f);
502 static nkf_char cap_ungetc(nkf_char c,FILE *f);
503
504 static int url_f = FALSE;
505 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
506 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
507 static nkf_char url_getc(FILE *f);
508 static nkf_char url_ungetc(nkf_char c,FILE *f);
509 #endif
510
511 #if defined(INT_IS_SHORT)
512 #define NKF_INT32_C(n)   (n##L)
513 #else
514 #define NKF_INT32_C(n)   (n)
515 #endif
516 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
517 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
518 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
519 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
520 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
521 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
522 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
523
524 #ifdef NUMCHAR_OPTION
525 static int numchar_f = FALSE;
526 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
527 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
528 static nkf_char numchar_getc(FILE *f);
529 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
530 #endif
531
532 #ifdef CHECK_OPTION
533 static int noout_f = FALSE;
534 static void no_putc(nkf_char c);
535 static nkf_char debug_f = FALSE;
536 static void debug(const char *str);
537 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
538 #endif
539
540 static int guess_f = FALSE;
541 #if !defined PERL_XS
542 static  void    print_guessed_code(char *filename);
543 #endif
544 static  void    set_input_codename(char *codename);
545 static int is_inputcode_mixed = FALSE;
546 static int is_inputcode_set   = FALSE;
547
548 #ifdef EXEC_IO
549 static int exec_f = 0;
550 #endif
551
552 #ifdef SHIFTJIS_CP932
553 /* invert IBM extended characters to others */
554 static int cp51932_f = FALSE;
555
556 /* invert NEC-selected IBM extended characters to IBM extended characters */
557 static int cp932inv_f = TRUE;
558
559 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
560 #endif /* SHIFTJIS_CP932 */
561
562 #ifdef X0212_ENABLE
563 static int x0212_f = FALSE;
564 static nkf_char x0212_shift(nkf_char c);
565 static nkf_char x0212_unshift(nkf_char c);
566 #endif
567 static int x0213_f = FALSE;
568
569 static unsigned char prefix_table[256];
570
571 static void set_code_score(struct input_code *ptr, nkf_char score);
572 static void clr_code_score(struct input_code *ptr, nkf_char score);
573 static void status_disable(struct input_code *ptr);
574 static void status_push_ch(struct input_code *ptr, nkf_char c);
575 static void status_clear(struct input_code *ptr);
576 static void status_reset(struct input_code *ptr);
577 static void status_reinit(struct input_code *ptr);
578 static void status_check(struct input_code *ptr, nkf_char c);
579 static void e_status(struct input_code *, nkf_char);
580 static void s_status(struct input_code *, nkf_char);
581
582 struct input_code input_code_list[] = {
583     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
584     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
585 #ifdef UTF8_INPUT_ENABLE
586     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
587     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
588     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
589 #endif
590     {0}
591 };
592
593 static int              mimeout_mode = 0;
594 static int              base64_count = 0;
595
596 /* X0208 -> ASCII converter */
597
598 /* fold parameter */
599 static int             f_line = 0;    /* chars in line */
600 static int             f_prev = 0;
601 static int             fold_preserve_f = FALSE; /* preserve new lines */
602 static int             fold_f  = FALSE;
603 static int             fold_len  = 0;
604
605 /* options */
606 static unsigned char   kanji_intro = DEFAULT_J;
607 static unsigned char   ascii_intro = DEFAULT_R;
608
609 /* Folding */
610
611 #define FOLD_MARGIN  10
612 #define DEFAULT_FOLD 60
613
614 static int             fold_margin  = FOLD_MARGIN;
615
616 /* converters */
617
618 #ifdef DEFAULT_CODE_JIS
619 #   define  DEFAULT_CONV j_oconv
620 #endif
621 #ifdef DEFAULT_CODE_SJIS
622 #   define  DEFAULT_CONV s_oconv
623 #endif
624 #ifdef DEFAULT_CODE_EUC
625 #   define  DEFAULT_CONV e_oconv
626 #endif
627 #ifdef DEFAULT_CODE_UTF8
628 #   define  DEFAULT_CONV w_oconv
629 #endif
630
631 /* process default */
632 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
633
634 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
635 /* s_iconv or oconv */
636 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
637
638 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
639 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
640 static void (*o_crconv)(nkf_char c2,nkf_char c1) = no_connection;
641 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
642 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
643 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
644 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
645
646 /* static redirections */
647
648 static  void   (*o_putc)(nkf_char c) = std_putc;
649
650 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
651 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
652
653 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
654 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
655
656 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
657
658 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
659 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
660
661 /* for strict mime */
662 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
663 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
664
665 /* Global states */
666 static int output_mode = ASCII,    /* output kanji mode */
667            input_mode =  ASCII,    /* input kanji mode */
668            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
669 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
670
671 /* X0201 / X0208 conversion tables */
672
673 /* X0201 kana conversion table */
674 /* 90-9F A0-DF */
675 static const unsigned char cv[]= {
676     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
677     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
678     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
679     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
680     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
681     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
682     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
683     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
684     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
685     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
686     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
687     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
688     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
689     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
690     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
691     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
692     0x00,0x00};
693
694
695 /* X0201 kana conversion table for daguten */
696 /* 90-9F A0-DF */
697 static const unsigned char dv[]= {
698     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
699     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
700     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
701     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
702     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
703     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
704     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
705     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
706     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
707     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
708     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
709     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
710     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
711     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
712     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
713     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
714     0x00,0x00};
715
716 /* X0201 kana conversion table for han-daguten */
717 /* 90-9F A0-DF */
718 static const unsigned char ev[]= {
719     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
720     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
721     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
722     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
723     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
724     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
725     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
726     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
727     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
728     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
729     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
730     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
731     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
732     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
733     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
734     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
735     0x00,0x00};
736
737
738 /* X0208 kigou conversion table */
739 /* 0x8140 - 0x819e */
740 static const unsigned char fv[] = {
741
742     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
743     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
744     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
745     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
746     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
747     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
748     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
749     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
750     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
751     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
752     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
753     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
754 } ;
755
756
757 #define    CRLF      1
758
759 static int             file_out_f = FALSE;
760 #ifdef OVERWRITE
761 static int             overwrite_f = FALSE;
762 static int             preserve_time_f = FALSE;
763 static int             backup_f = FALSE;
764 static char            *backup_suffix = "";
765 static char *get_backup_filename(const char *suffix, const char *filename);
766 #endif
767
768 static int             crmode_f = 0;   /* CR, NL, CRLF */
769 static nkf_char prev_cr = 0;
770 #ifdef EASYWIN /*Easy Win */
771 static int             end_check;
772 #endif /*Easy Win */
773
774 #define STD_GC_BUFSIZE (256)
775 nkf_char std_gc_buf[STD_GC_BUFSIZE];
776 nkf_char std_gc_ndx;
777
778 #ifdef WIN32DLL
779 #include "nkf32dll.c"
780 #elif defined(PERL_XS)
781 #else /* WIN32DLL */
782 int main(int argc, char **argv)
783 {
784     FILE  *fin;
785     unsigned char  *cp;
786
787     char *outfname = NULL;
788     char *origfname;
789
790 #ifdef EASYWIN /*Easy Win */
791     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
792 #endif
793
794     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
795         cp = (unsigned char *)*argv;
796         options(cp);
797 #ifdef EXEC_IO
798         if (exec_f){
799             int fds[2], pid;
800             if (pipe(fds) < 0 || (pid = fork()) < 0){
801                 abort();
802             }
803             if (pid == 0){
804                 if (exec_f > 0){
805                     close(fds[0]);
806                     dup2(fds[1], 1);
807                 }else{
808                     close(fds[1]);
809                     dup2(fds[0], 0);
810                 }
811                 execvp(argv[1], &argv[1]);
812             }
813             if (exec_f > 0){
814                 close(fds[1]);
815                 dup2(fds[0], 0);
816             }else{
817                 close(fds[0]);
818                 dup2(fds[1], 1);
819             }
820             argc = 0;
821             break;
822         }
823 #endif
824     }
825     if(x0201_f == WISH_TRUE)
826          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
827
828     if (binmode_f == TRUE)
829 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
830     if (freopen("","wb",stdout) == NULL) 
831         return (-1);
832 #else
833     setbinmode(stdout);
834 #endif
835
836     if (unbuf_f)
837       setbuf(stdout, (char *) NULL);
838     else
839       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
840
841     if (argc == 0) {
842       if (binmode_f == TRUE)
843 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
844       if (freopen("","rb",stdin) == NULL) return (-1);
845 #else
846       setbinmode(stdin);
847 #endif
848       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
849       if (nop_f)
850           noconvert(stdin);
851       else {
852           kanji_convert(stdin);
853           if (guess_f) print_guessed_code(NULL);
854       }
855     } else {
856       int nfiles = argc;
857         int is_argument_error = FALSE;
858       while (argc--) {
859             is_inputcode_mixed = FALSE;
860             is_inputcode_set   = FALSE;
861             input_codename = "";
862 #ifdef CHECK_OPTION
863             iconv_for_check = 0;
864 #endif
865           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
866               perror(*--argv);
867                 *argv++;
868                 is_argument_error = TRUE;
869                 continue;
870           } else {
871 #ifdef OVERWRITE
872               int fd = 0;
873               int fd_backup = 0;
874 #endif
875
876 /* reopen file for stdout */
877               if (file_out_f == TRUE) {
878 #ifdef OVERWRITE
879                   if (overwrite_f){
880                       outfname = malloc(strlen(origfname)
881                                         + strlen(".nkftmpXXXXXX")
882                                         + 1);
883                       if (!outfname){
884                           perror(origfname);
885                           return -1;
886                       }
887                       strcpy(outfname, origfname);
888 #ifdef MSDOS
889                       {
890                           int i;
891                           for (i = strlen(outfname); i; --i){
892                               if (outfname[i - 1] == '/'
893                                   || outfname[i - 1] == '\\'){
894                                   break;
895                               }
896                           }
897                           outfname[i] = '\0';
898                       }
899                       strcat(outfname, "ntXXXXXX");
900                       mktemp(outfname);
901                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
902                                 S_IREAD | S_IWRITE);
903 #else
904                       strcat(outfname, ".nkftmpXXXXXX");
905                       fd = mkstemp(outfname);
906 #endif
907                       if (fd < 0
908                           || (fd_backup = dup(fileno(stdout))) < 0
909                           || dup2(fd, fileno(stdout)) < 0
910                           ){
911                           perror(origfname);
912                           return -1;
913                       }
914                   }else
915 #endif
916                   if(argc == 1 ) {
917                       outfname = *argv++;
918                       argc--;
919                   } else {
920                       outfname = "nkf.out";
921                   }
922
923                   if(freopen(outfname, "w", stdout) == NULL) {
924                       perror (outfname);
925                       return (-1);
926                   }
927                   if (binmode_f == TRUE) {
928 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
929                       if (freopen("","wb",stdout) == NULL) 
930                            return (-1);
931 #else
932                       setbinmode(stdout);
933 #endif
934                   }
935               }
936               if (binmode_f == TRUE)
937 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
938                  if (freopen("","rb",fin) == NULL) 
939                     return (-1);
940 #else
941                  setbinmode(fin);
942 #endif 
943               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
944               if (nop_f)
945                   noconvert(fin);
946               else {
947                   char *filename = NULL;
948                   kanji_convert(fin);
949                   if (nfiles > 1) filename = origfname;
950                   if (guess_f) print_guessed_code(filename);
951               }
952               fclose(fin);
953 #ifdef OVERWRITE
954               if (overwrite_f) {
955                   struct stat     sb;
956 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
957                   time_t tb[2];
958 #else
959                   struct utimbuf  tb;
960 #endif
961
962                   fflush(stdout);
963                   close(fd);
964                   if (dup2(fd_backup, fileno(stdout)) < 0){
965                       perror("dup2");
966                   }
967                   if (stat(origfname, &sb)) {
968                       fprintf(stderr, "Can't stat %s\n", origfname);
969                   }
970                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
971                   if (chmod(outfname, sb.st_mode)) {
972                       fprintf(stderr, "Can't set permission %s\n", outfname);
973                   }
974
975                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
976                     if(preserve_time_f){
977 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
978                         tb[0] = tb[1] = sb.st_mtime;
979                         if (utime(outfname, tb)) {
980                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
981                         }
982 #else
983                         tb.actime  = sb.st_atime;
984                         tb.modtime = sb.st_mtime;
985                         if (utime(outfname, &tb)) {
986                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
987                         }
988 #endif
989                     }
990                     if(backup_f){
991                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
992 #ifdef MSDOS
993                         unlink(backup_filename);
994 #endif
995                         if (rename(origfname, backup_filename)) {
996                             perror(backup_filename);
997                             fprintf(stderr, "Can't rename %s to %s\n",
998                                     origfname, backup_filename);
999                         }
1000                     }else{
1001 #ifdef MSDOS
1002                         if (unlink(origfname)){
1003                             perror(origfname);
1004                         }
1005 #endif
1006                     }
1007                   if (rename(outfname, origfname)) {
1008                       perror(origfname);
1009                       fprintf(stderr, "Can't rename %s to %s\n",
1010                               outfname, origfname);
1011                   }
1012                   free(outfname);
1013               }
1014 #endif
1015           }
1016       }
1017         if (is_argument_error)
1018             return(-1);
1019     }
1020 #ifdef EASYWIN /*Easy Win */
1021     if (file_out_f == FALSE) 
1022         scanf("%d",&end_check);
1023     else 
1024         fclose(stdout);
1025 #else /* for Other OS */
1026     if (file_out_f == TRUE) 
1027         fclose(stdout);
1028 #endif /*Easy Win */
1029     return (0);
1030 }
1031 #endif /* WIN32DLL */
1032
1033 #ifdef OVERWRITE
1034 char *get_backup_filename(const char *suffix, const char *filename)
1035 {
1036     char *backup_filename;
1037     int asterisk_count = 0;
1038     int i, j;
1039     int filename_length = strlen(filename);
1040
1041     for(i = 0; suffix[i]; i++){
1042         if(suffix[i] == '*') asterisk_count++;
1043     }
1044
1045     if(asterisk_count){
1046         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1047         if (!backup_filename){
1048             perror("Can't malloc backup filename.");
1049             return NULL;
1050         }
1051
1052         for(i = 0, j = 0; suffix[i];){
1053             if(suffix[i] == '*'){
1054                 backup_filename[j] = '\0';
1055                 strncat(backup_filename, filename, filename_length);
1056                 i++;
1057                 j += filename_length;
1058             }else{
1059                 backup_filename[j++] = suffix[i++];
1060             }
1061         }
1062         backup_filename[j] = '\0';
1063     }else{
1064         j = strlen(suffix) + filename_length;
1065         backup_filename = malloc( + 1);
1066         strcpy(backup_filename, filename);
1067         strcat(backup_filename, suffix);
1068         backup_filename[j] = '\0';
1069     }
1070     return backup_filename;
1071 }
1072 #endif
1073
1074 static const struct {
1075     const char *name;
1076     const char *alias;
1077 } long_option[] = {
1078     {"ic=", ""},
1079     {"oc=", ""},
1080     {"base64","jMB"},
1081     {"euc","e"},
1082     {"euc-input","E"},
1083     {"fj","jm"},
1084     {"help","v"},
1085     {"jis","j"},
1086     {"jis-input","J"},
1087     {"mac","sLm"},
1088     {"mime","jM"},
1089     {"mime-input","m"},
1090     {"msdos","sLw"},
1091     {"sjis","s"},
1092     {"sjis-input","S"},
1093     {"unix","eLu"},
1094     {"version","V"},
1095     {"windows","sLw"},
1096     {"hiragana","h1"},
1097     {"katakana","h2"},
1098     {"katakana-hiragana","h3"},
1099     {"guess", "g"},
1100     {"cp932", ""},
1101     {"no-cp932", ""},
1102 #ifdef X0212_ENABLE
1103     {"x0212", ""},
1104 #endif
1105 #ifdef UTF8_OUTPUT_ENABLE
1106     {"utf8", "w"},
1107     {"utf16", "w16"},
1108     {"ms-ucs-map", ""},
1109     {"fb-skip", ""},
1110     {"fb-html", ""},
1111     {"fb-xml", ""},
1112     {"fb-perl", ""},
1113     {"fb-java", ""},
1114     {"fb-subchar", ""},
1115     {"fb-subchar=", ""},
1116 #endif
1117 #ifdef UTF8_INPUT_ENABLE
1118     {"utf8-input", "W"},
1119     {"utf16-input", "W16"},
1120     {"no-cp932ext", ""},
1121     {"no-best-fit-chars",""},
1122 #endif
1123 #ifdef UNICODE_NORMALIZATION
1124     {"utf8mac-input", ""},
1125 #endif
1126 #ifdef OVERWRITE
1127     {"overwrite", ""},
1128     {"overwrite=", ""},
1129     {"in-place", ""},
1130     {"in-place=", ""},
1131 #endif
1132 #ifdef INPUT_OPTION
1133     {"cap-input", ""},
1134     {"url-input", ""},
1135 #endif
1136 #ifdef NUMCHAR_OPTION
1137     {"numchar-input", ""},
1138 #endif
1139 #ifdef CHECK_OPTION
1140     {"no-output", ""},
1141     {"debug", ""},
1142 #endif
1143 #ifdef SHIFTJIS_CP932
1144     {"cp932inv", ""},
1145 #endif
1146 #ifdef EXEC_IO
1147     {"exec-in", ""},
1148     {"exec-out", ""},
1149 #endif
1150     {"prefix=", ""},
1151 };
1152
1153 static int option_mode = 0;
1154
1155 void options(unsigned char *cp)
1156 {
1157     nkf_char i, j;
1158     unsigned char *p;
1159     unsigned char *cp_back = NULL;
1160     char codeset[32];
1161
1162     if (option_mode==1)
1163         return;
1164     while(*cp && *cp++!='-');
1165     while (*cp || cp_back) {
1166         if(!*cp){
1167             cp = cp_back;
1168             cp_back = NULL;
1169             continue;
1170         }
1171         p = 0;
1172         switch (*cp++) {
1173         case '-':  /* literal options */
1174             if (!*cp || *cp == SPACE) {        /* ignore the rest of arguments */
1175                 option_mode = 1;
1176                 return;
1177             }
1178             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1179                 p = (unsigned char *)long_option[i].name;
1180                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1181                 if (*p == cp[j] || cp[j] == ' '){
1182                     p = &cp[j] + 1;
1183                     break;
1184                 }
1185                 p = 0;
1186             }
1187             if (p == 0) return;
1188             while(*cp && *cp != SPACE && cp++);
1189             if (long_option[i].alias[0]){
1190                 cp_back = cp;
1191                 cp = (unsigned char *)long_option[i].alias;
1192             }else{
1193                 if (strcmp(long_option[i].name, "ic=") == 0){
1194                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1195                         codeset[i] = nkf_toupper(p[i]);
1196                     }
1197                     codeset[i] = 0;
1198                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1199                         input_f = JIS_INPUT;
1200                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1201                       strcmp(codeset, "CP50220") == 0 ||
1202                       strcmp(codeset, "CP50221") == 0 ||
1203                       strcmp(codeset, "CP50222") == 0){
1204                         input_f = JIS_INPUT;
1205 #ifdef SHIFTJIS_CP932
1206                         cp51932_f = TRUE;
1207 #endif
1208 #ifdef UTF8_OUTPUT_ENABLE
1209                         ms_ucs_map_f = UCS_MAP_CP932;
1210 #endif
1211                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1212                         input_f = JIS_INPUT;
1213 #ifdef X0212_ENABLE
1214                         x0212_f = TRUE;
1215 #endif
1216                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1217                         input_f = JIS_INPUT;
1218 #ifdef X0212_ENABLE
1219                         x0212_f = TRUE;
1220 #endif
1221                         x0213_f = TRUE;
1222                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1223                         input_f = SJIS_INPUT;
1224                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1225                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1226                              strcmp(codeset, "CP932") == 0 ||
1227                              strcmp(codeset, "MS932") == 0){
1228                         input_f = SJIS_INPUT;
1229 #ifdef SHIFTJIS_CP932
1230                         cp51932_f = TRUE;
1231 #endif
1232 #ifdef UTF8_OUTPUT_ENABLE
1233                         ms_ucs_map_f = UCS_MAP_CP932;
1234 #endif
1235                     }else if(strcmp(codeset, "CP10001") == 0){
1236                         input_f = SJIS_INPUT;
1237 #ifdef SHIFTJIS_CP932
1238                         cp51932_f = TRUE;
1239 #endif
1240 #ifdef UTF8_OUTPUT_ENABLE
1241                         ms_ucs_map_f = UCS_MAP_CP10001;
1242 #endif
1243                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1244                              strcmp(codeset, "EUC-JP") == 0){
1245                         input_f = EUC_INPUT;
1246                     }else if(strcmp(codeset, "CP51932") == 0){
1247                         input_f = EUC_INPUT;
1248 #ifdef SHIFTJIS_CP932
1249                         cp51932_f = TRUE;
1250 #endif
1251 #ifdef UTF8_OUTPUT_ENABLE
1252                         ms_ucs_map_f = UCS_MAP_CP932;
1253 #endif
1254                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1255                              strcmp(codeset, "EUCJP-MS") == 0 ||
1256                              strcmp(codeset, "EUCJPMS") == 0){
1257                         input_f = EUC_INPUT;
1258 #ifdef SHIFTJIS_CP932
1259                         cp51932_f = FALSE;
1260 #endif
1261 #ifdef UTF8_OUTPUT_ENABLE
1262                         ms_ucs_map_f = UCS_MAP_MS;
1263 #endif
1264                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1265                              strcmp(codeset, "EUCJP-ASCII") == 0){
1266                         input_f = EUC_INPUT;
1267 #ifdef SHIFTJIS_CP932
1268                         cp51932_f = FALSE;
1269 #endif
1270 #ifdef UTF8_OUTPUT_ENABLE
1271                         ms_ucs_map_f = UCS_MAP_ASCII;
1272 #endif
1273                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1274                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1275                         input_f = SJIS_INPUT;
1276                         x0213_f = TRUE;
1277 #ifdef SHIFTJIS_CP932
1278                         cp51932_f = FALSE;
1279 #endif
1280                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1281                              strcmp(codeset, "EUC-JIS-2004") == 0){
1282                         input_f = EUC_INPUT;
1283                         x0213_f = TRUE;
1284 #ifdef SHIFTJIS_CP932
1285                         cp51932_f = FALSE;
1286 #endif
1287 #ifdef UTF8_INPUT_ENABLE
1288                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1289                              strcmp(codeset, "UTF-8N") == 0 ||
1290                              strcmp(codeset, "UTF-8-BOM") == 0){
1291                         input_f = UTF8_INPUT;
1292 #ifdef UNICODE_NORMALIZATION
1293                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1294                              strcmp(codeset, "UTF-8-MAC") == 0){
1295                         input_f = UTF8_INPUT;
1296                         nfc_f = TRUE;
1297 #endif
1298                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1299                              strcmp(codeset, "UTF-16BE") == 0 ||
1300                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1301                         input_f = UTF16_INPUT;
1302                         input_endian = ENDIAN_BIG;
1303                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1304                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1305                         input_f = UTF16_INPUT;
1306                         input_endian = ENDIAN_LITTLE;
1307                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1308                              strcmp(codeset, "UTF-32BE") == 0 ||
1309                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1310                         input_f = UTF32_INPUT;
1311                         input_endian = ENDIAN_BIG;
1312                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1313                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1314                         input_f = UTF32_INPUT;
1315                         input_endian = ENDIAN_LITTLE;
1316 #endif
1317                     }
1318                     continue;
1319                 }
1320                 if (strcmp(long_option[i].name, "oc=") == 0){
1321                     x0201_f = FALSE;
1322                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1323                         codeset[i] = nkf_toupper(p[i]);
1324                     }
1325                     codeset[i] = 0;
1326                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1327                         output_conv = j_oconv;
1328                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1329                         output_conv = j_oconv;
1330                         no_cp932ext_f = TRUE;
1331 #ifdef SHIFTJIS_CP932
1332                         cp932inv_f = FALSE;
1333 #endif
1334 #ifdef UTF8_OUTPUT_ENABLE
1335                         ms_ucs_map_f = UCS_MAP_CP932;
1336 #endif
1337                     }else if(strcmp(codeset, "CP50220") == 0){
1338                         output_conv = j_oconv;
1339                         x0201_f = TRUE;
1340 #ifdef SHIFTJIS_CP932
1341                         cp932inv_f = FALSE;
1342 #endif
1343 #ifdef UTF8_OUTPUT_ENABLE
1344                         ms_ucs_map_f = UCS_MAP_CP932;
1345 #endif
1346                     }else if(strcmp(codeset, "CP50221") == 0){
1347                         output_conv = j_oconv;
1348 #ifdef SHIFTJIS_CP932
1349                         cp932inv_f = FALSE;
1350 #endif
1351 #ifdef UTF8_OUTPUT_ENABLE
1352                         ms_ucs_map_f = UCS_MAP_CP932;
1353 #endif
1354                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1355                         output_conv = j_oconv;
1356 #ifdef X0212_ENABLE
1357                         x0212_f = TRUE;
1358 #endif
1359 #ifdef SHIFTJIS_CP932
1360                         cp932inv_f = FALSE;
1361 #endif
1362                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1363                         output_conv = j_oconv;
1364 #ifdef X0212_ENABLE
1365                         x0212_f = TRUE;
1366 #endif
1367                         x0213_f = TRUE;
1368 #ifdef SHIFTJIS_CP932
1369                         cp932inv_f = FALSE;
1370 #endif
1371                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1372                         output_conv = s_oconv;
1373                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1374                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1375                              strcmp(codeset, "CP932") == 0 ||
1376                              strcmp(codeset, "MS932") == 0){
1377                         output_conv = s_oconv;
1378 #ifdef UTF8_OUTPUT_ENABLE
1379                         ms_ucs_map_f = UCS_MAP_CP932;
1380 #endif
1381                     }else if(strcmp(codeset, "CP10001") == 0){
1382                         output_conv = s_oconv;
1383 #ifdef UTF8_OUTPUT_ENABLE
1384                         ms_ucs_map_f = UCS_MAP_CP10001;
1385 #endif
1386                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1387                              strcmp(codeset, "EUC-JP") == 0){
1388                         output_conv = e_oconv;
1389                     }else if(strcmp(codeset, "CP51932") == 0){
1390                         output_conv = e_oconv;
1391 #ifdef SHIFTJIS_CP932
1392                         cp932inv_f = FALSE;
1393 #endif
1394 #ifdef UTF8_OUTPUT_ENABLE
1395                         ms_ucs_map_f = UCS_MAP_CP932;
1396 #endif
1397                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1398                              strcmp(codeset, "EUCJP-MS") == 0 ||
1399                              strcmp(codeset, "EUCJPMS") == 0){
1400                         output_conv = e_oconv;
1401 #ifdef X0212_ENABLE
1402                         x0212_f = TRUE;
1403 #endif
1404 #ifdef UTF8_OUTPUT_ENABLE
1405                         ms_ucs_map_f = UCS_MAP_MS;
1406 #endif
1407                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1408                              strcmp(codeset, "EUCJP-ASCII") == 0){
1409                         output_conv = e_oconv;
1410 #ifdef X0212_ENABLE
1411                         x0212_f = TRUE;
1412 #endif
1413 #ifdef UTF8_OUTPUT_ENABLE
1414                         ms_ucs_map_f = UCS_MAP_ASCII;
1415 #endif
1416                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1417                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1418                         output_conv = s_oconv;
1419                         x0213_f = TRUE;
1420 #ifdef SHIFTJIS_CP932
1421                         cp932inv_f = FALSE;
1422 #endif
1423                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1424                              strcmp(codeset, "EUC-JIS-2004") == 0){
1425                         output_conv = e_oconv;
1426 #ifdef X0212_ENABLE
1427                         x0212_f = TRUE;
1428 #endif
1429                         x0213_f = TRUE;
1430 #ifdef SHIFTJIS_CP932
1431                         cp932inv_f = FALSE;
1432 #endif
1433 #ifdef UTF8_OUTPUT_ENABLE
1434                     }else if(strcmp(codeset, "UTF-8") == 0){
1435                         output_conv = w_oconv;
1436                     }else if(strcmp(codeset, "UTF-8N") == 0){
1437                         output_conv = w_oconv;
1438                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1439                         output_conv = w_oconv;
1440                         output_bom_f = TRUE;
1441                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1442                         output_conv = w_oconv16;
1443                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1444                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1445                         output_conv = w_oconv16;
1446                         output_bom_f = TRUE;
1447                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1448                         output_conv = w_oconv16;
1449                         output_endian = ENDIAN_LITTLE;
1450                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1451                         output_conv = w_oconv16;
1452                         output_endian = ENDIAN_LITTLE;
1453                         output_bom_f = TRUE;
1454                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1455                              strcmp(codeset, "UTF-32BE") == 0){
1456                         output_conv = w_oconv32;
1457                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1458                         output_conv = w_oconv32;
1459                         output_bom_f = TRUE;
1460                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1461                         output_conv = w_oconv32;
1462                         output_endian = ENDIAN_LITTLE;
1463                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1464                         output_conv = w_oconv32;
1465                         output_endian = ENDIAN_LITTLE;
1466                         output_bom_f = TRUE;
1467 #endif
1468                     }
1469                     continue;
1470                 }
1471 #ifdef OVERWRITE
1472                 if (strcmp(long_option[i].name, "overwrite") == 0){
1473                     file_out_f = TRUE;
1474                     overwrite_f = TRUE;
1475                     preserve_time_f = TRUE;
1476                     continue;
1477                 }
1478                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1479                     file_out_f = TRUE;
1480                     overwrite_f = TRUE;
1481                     preserve_time_f = TRUE;
1482                     backup_f = TRUE;
1483                     backup_suffix = malloc(strlen((char *) p) + 1);
1484                     strcpy(backup_suffix, (char *) p);
1485                     continue;
1486                 }
1487                 if (strcmp(long_option[i].name, "in-place") == 0){
1488                     file_out_f = TRUE;
1489                     overwrite_f = TRUE;
1490                     preserve_time_f = FALSE;
1491                     continue;
1492                 }
1493                 if (strcmp(long_option[i].name, "in-place=") == 0){
1494                     file_out_f = TRUE;
1495                     overwrite_f = TRUE;
1496                     preserve_time_f = FALSE;
1497                     backup_f = TRUE;
1498                     backup_suffix = malloc(strlen((char *) p) + 1);
1499                     strcpy(backup_suffix, (char *) p);
1500                     continue;
1501                 }
1502 #endif
1503 #ifdef INPUT_OPTION
1504                 if (strcmp(long_option[i].name, "cap-input") == 0){
1505                     cap_f = TRUE;
1506                     continue;
1507                 }
1508                 if (strcmp(long_option[i].name, "url-input") == 0){
1509                     url_f = TRUE;
1510                     continue;
1511                 }
1512 #endif
1513 #ifdef NUMCHAR_OPTION
1514                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1515                     numchar_f = TRUE;
1516                     continue;
1517                 }
1518 #endif
1519 #ifdef CHECK_OPTION
1520                 if (strcmp(long_option[i].name, "no-output") == 0){
1521                     noout_f = TRUE;
1522                     continue;
1523                 }
1524                 if (strcmp(long_option[i].name, "debug") == 0){
1525                     debug_f = TRUE;
1526                     continue;
1527                 }
1528 #endif
1529                 if (strcmp(long_option[i].name, "cp932") == 0){
1530 #ifdef SHIFTJIS_CP932
1531                     cp51932_f = TRUE;
1532                     cp932inv_f = TRUE;
1533 #endif
1534 #ifdef UTF8_OUTPUT_ENABLE
1535                     ms_ucs_map_f = UCS_MAP_CP932;
1536 #endif
1537                     continue;
1538                 }
1539                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1540 #ifdef SHIFTJIS_CP932
1541                     cp51932_f = FALSE;
1542                     cp932inv_f = FALSE;
1543 #endif
1544 #ifdef UTF8_OUTPUT_ENABLE
1545                     ms_ucs_map_f = UCS_MAP_ASCII;
1546 #endif
1547                     continue;
1548                 }
1549 #ifdef SHIFTJIS_CP932
1550                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1551                     cp932inv_f = TRUE;
1552                     continue;
1553                 }
1554 #endif
1555
1556 #ifdef X0212_ENABLE
1557                 if (strcmp(long_option[i].name, "x0212") == 0){
1558                     x0212_f = TRUE;
1559                     continue;
1560                 }
1561 #endif
1562
1563 #ifdef EXEC_IO
1564                   if (strcmp(long_option[i].name, "exec-in") == 0){
1565                       exec_f = 1;
1566                       return;
1567                   }
1568                   if (strcmp(long_option[i].name, "exec-out") == 0){
1569                       exec_f = -1;
1570                       return;
1571                   }
1572 #endif
1573 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1574                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1575                     no_cp932ext_f = TRUE;
1576                     continue;
1577                 }
1578                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1579                     no_best_fit_chars_f = TRUE;
1580                     continue;
1581                 }
1582                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1583                     encode_fallback = NULL;
1584                     continue;
1585                 }
1586                 if (strcmp(long_option[i].name, "fb-html") == 0){
1587                     encode_fallback = encode_fallback_html;
1588                     continue;
1589                 }
1590                 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1591                     encode_fallback = encode_fallback_xml;
1592                     continue;
1593                 }
1594                 if (strcmp(long_option[i].name, "fb-java") == 0){
1595                     encode_fallback = encode_fallback_java;
1596                     continue;
1597                 }
1598                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1599                     encode_fallback = encode_fallback_perl;
1600                     continue;
1601                 }
1602                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1603                     encode_fallback = encode_fallback_subchar;
1604                     continue;
1605                 }
1606                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1607                     encode_fallback = encode_fallback_subchar;
1608                     unicode_subchar = 0;
1609                     if (p[0] != '0'){
1610                         /* decimal number */
1611                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1612                             unicode_subchar *= 10;
1613                             unicode_subchar += hex2bin(p[i]);
1614                         }
1615                     }else if(p[1] == 'x' || p[1] == 'X'){
1616                         /* hexadecimal number */
1617                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1618                             unicode_subchar <<= 4;
1619                             unicode_subchar |= hex2bin(p[i]);
1620                         }
1621                     }else{
1622                         /* octal number */
1623                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1624                             unicode_subchar *= 8;
1625                             unicode_subchar += hex2bin(p[i]);
1626                         }
1627                     }
1628                     w16e_conv(unicode_subchar, &i, &j);
1629                     unicode_subchar = i<<8 | j;
1630                     continue;
1631                 }
1632 #endif
1633 #ifdef UTF8_OUTPUT_ENABLE
1634                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1635                     ms_ucs_map_f = UCS_MAP_MS;
1636                     continue;
1637                 }
1638 #endif
1639 #ifdef UNICODE_NORMALIZATION
1640                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1641                     input_f = UTF8_INPUT;
1642                     nfc_f = TRUE;
1643                     continue;
1644                 }
1645 #endif
1646                 if (strcmp(long_option[i].name, "prefix=") == 0){
1647                     if (nkf_isgraph(p[0])){
1648                         for (i = 1; nkf_isgraph(p[i]); i++){
1649                             prefix_table[p[i]] = p[0];
1650                         }
1651                     }
1652                     continue;
1653                 }
1654             }
1655             continue;
1656         case 'b':           /* buffered mode */
1657             unbuf_f = FALSE;
1658             continue;
1659         case 'u':           /* non bufferd mode */
1660             unbuf_f = TRUE;
1661             continue;
1662         case 't':           /* transparent mode */
1663             if (*cp=='1') {
1664                 /* alias of -t */
1665                 nop_f = TRUE;
1666                 *cp++;
1667             } else if (*cp=='2') {
1668                 /*
1669                  * -t with put/get
1670                  *
1671                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1672                  *
1673                  */
1674                 nop_f = 2;
1675                 *cp++;
1676             } else
1677                 nop_f = TRUE;
1678             continue;
1679         case 'j':           /* JIS output */
1680         case 'n':
1681             output_conv = j_oconv;
1682             continue;
1683         case 'e':           /* AT&T EUC output */
1684             output_conv = e_oconv;
1685             cp932inv_f = FALSE;
1686             continue;
1687         case 's':           /* SJIS output */
1688             output_conv = s_oconv;
1689             continue;
1690         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1691             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1692             input_f = LATIN1_INPUT;
1693             continue;
1694         case 'i':           /* Kanji IN ESC-$-@/B */
1695             if (*cp=='@'||*cp=='B') 
1696                 kanji_intro = *cp++;
1697             continue;
1698         case 'o':           /* ASCII IN ESC-(-J/B */
1699             if (*cp=='J'||*cp=='B'||*cp=='H') 
1700                 ascii_intro = *cp++;
1701             continue;
1702         case 'h':
1703             /*  
1704                 bit:1   katakana->hiragana
1705                 bit:2   hiragana->katakana
1706             */
1707             if ('9'>= *cp && *cp>='0') 
1708                 hira_f |= (*cp++ -'0');
1709             else 
1710                 hira_f |= 1;
1711             continue;
1712         case 'r':
1713             rot_f = TRUE;
1714             continue;
1715 #if defined(MSDOS) || defined(__OS2__) 
1716         case 'T':
1717             binmode_f = FALSE;
1718             continue;
1719 #endif
1720 #ifndef PERL_XS
1721         case 'V':
1722             version();
1723             exit(1);
1724             break;
1725         case 'v':
1726             usage();
1727             exit(1);
1728             break;
1729 #endif
1730 #ifdef UTF8_OUTPUT_ENABLE
1731         case 'w':           /* UTF-8 output */
1732             if (cp[0] == '8') {
1733                 output_conv = w_oconv; cp++;
1734                 if (cp[0] == '0'){
1735                     cp++;
1736                 } else {
1737                     output_bom_f = TRUE;
1738                 }
1739             } else {
1740                 if ('1'== cp[0] && '6'==cp[1]) {
1741                     output_conv = w_oconv16; cp+=2;
1742                 } else if ('3'== cp[0] && '2'==cp[1]) {
1743                     output_conv = w_oconv32; cp+=2;
1744                 } else {
1745                     output_conv = w_oconv;
1746                     continue;
1747                 }
1748                 if (cp[0]=='L') {
1749                     cp++;
1750                     output_endian = ENDIAN_LITTLE;
1751                 } else if (cp[0] == 'B') {
1752                     cp++;
1753                 } else {
1754                     continue;
1755                 }
1756                 if (cp[0] == '0'){
1757                     cp++;
1758                 } else {
1759                     output_bom_f = TRUE;
1760                 }
1761             }
1762             continue;
1763 #endif
1764 #ifdef UTF8_INPUT_ENABLE
1765         case 'W':           /* UTF input */
1766             if (cp[0] == '8') {
1767                 cp++;
1768                 input_f = UTF8_INPUT;
1769             }else{
1770                 if ('1'== cp[0] && '6'==cp[1]) {
1771                     cp += 2;
1772                     input_f = UTF16_INPUT;
1773                     input_endian = ENDIAN_BIG;
1774                 } else if ('3'== cp[0] && '2'==cp[1]) {
1775                     cp += 2;
1776                     input_f = UTF32_INPUT;
1777                     input_endian = ENDIAN_BIG;
1778                 } else {
1779                     input_f = UTF8_INPUT;
1780                     continue;
1781                 }
1782                 if (cp[0]=='L') {
1783                     cp++;
1784                     input_endian = ENDIAN_LITTLE;
1785                 } else if (cp[0] == 'B') {
1786                     cp++;
1787                 }
1788             }
1789             continue;
1790 #endif
1791         /* Input code assumption */
1792         case 'J':   /* JIS input */
1793             input_f = JIS_INPUT;
1794             continue;
1795         case 'E':   /* AT&T EUC input */
1796             input_f = EUC_INPUT;
1797             continue;
1798         case 'S':   /* MS Kanji input */
1799             input_f = SJIS_INPUT;
1800             if (x0201_f==NO_X0201) x0201_f=TRUE;
1801             continue;
1802         case 'Z':   /* Convert X0208 alphabet to asii */
1803             /* alpha_f
1804                bit:0   Convert JIS X 0208 Alphabet to ASCII
1805                bit:1   Convert Kankaku to one space
1806                bit:2   Convert Kankaku to two spaces
1807                bit:3   Convert HTML Entity
1808                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1809             */
1810             while ('0'<= *cp && *cp <='9') {
1811                 alpha_f |= 1 << (*cp++ - '0');
1812             }
1813             if (!alpha_f) alpha_f = 1;
1814             continue;
1815         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1816             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1817             /* accept  X0201
1818                     ESC-(-I     in JIS, EUC, MS Kanji
1819                     SI/SO       in JIS, EUC, MS Kanji
1820                     SSO         in EUC, JIS, not in MS Kanji
1821                     MS Kanji (0xa0-0xdf) 
1822                output  X0201
1823                     ESC-(-I     in JIS (0x20-0x5f)
1824                     SSO         in EUC (0xa0-0xdf)
1825                     0xa0-0xd    in MS Kanji (0xa0-0xdf) 
1826             */
1827             continue;
1828         case 'X':   /* Assume X0201 kana */
1829             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1830             x0201_f = TRUE;
1831             continue;
1832         case 'F':   /* prserve new lines */
1833             fold_preserve_f = TRUE;
1834         case 'f':   /* folding -f60 or -f */
1835             fold_f = TRUE;
1836             fold_len = 0;
1837             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1838                 fold_len *= 10;
1839                 fold_len += *cp++ - '0';
1840             }
1841             if (!(0<fold_len && fold_len<BUFSIZ)) 
1842                 fold_len = DEFAULT_FOLD;
1843             if (*cp=='-') {
1844                 fold_margin = 0;
1845                 cp++;
1846                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1847                     fold_margin *= 10;
1848                     fold_margin += *cp++ - '0';
1849                 }
1850             }
1851             continue;
1852         case 'm':   /* MIME support */
1853             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1854             if (*cp=='B'||*cp=='Q') {
1855                 mime_decode_mode = *cp++;
1856                 mimebuf_f = FIXED_MIME;
1857             } else if (*cp=='N') {
1858                 mime_f = TRUE; cp++;
1859             } else if (*cp=='S') {
1860                 mime_f = STRICT_MIME; cp++;
1861             } else if (*cp=='0') {
1862                 mime_decode_f = FALSE;
1863                 mime_f = FALSE; cp++;
1864             }
1865             continue;
1866         case 'M':   /* MIME output */
1867             if (*cp=='B') {
1868                 mimeout_mode = 'B';
1869                 mimeout_f = FIXED_MIME; cp++;
1870             } else if (*cp=='Q') {
1871                 mimeout_mode = 'Q';
1872                 mimeout_f = FIXED_MIME; cp++;
1873             } else {
1874                 mimeout_f = TRUE;
1875             }
1876             continue;
1877         case 'B':   /* Broken JIS support */
1878             /*  bit:0   no ESC JIS
1879                 bit:1   allow any x on ESC-(-x or ESC-$-x
1880                 bit:2   reset to ascii on NL
1881             */
1882             if ('9'>= *cp && *cp>='0') 
1883                 broken_f |= 1<<(*cp++ -'0');
1884             else 
1885                 broken_f |= TRUE;
1886             continue;
1887 #ifndef PERL_XS
1888         case 'O':/* for Output file */
1889             file_out_f = TRUE;
1890             continue;
1891 #endif
1892         case 'c':/* add cr code */
1893             crmode_f = CRLF;
1894             continue;
1895         case 'd':/* delete cr code */
1896             crmode_f = NL;
1897             continue;
1898         case 'I':   /* ISO-2022-JP output */
1899             iso2022jp_f = TRUE;
1900             continue;
1901         case 'L':  /* line mode */
1902             if (*cp=='u') {         /* unix */
1903                 crmode_f = NL; cp++;
1904             } else if (*cp=='m') { /* mac */
1905                 crmode_f = CR; cp++;
1906             } else if (*cp=='w') { /* windows */
1907                 crmode_f = CRLF; cp++;
1908             } else if (*cp=='0') { /* no conversion  */
1909                 crmode_f = 0; cp++;
1910             }
1911             continue;
1912         case 'g':
1913 #ifndef PERL_XS
1914             guess_f = TRUE;
1915 #endif
1916             continue;
1917         case ' ':    
1918         /* module muliple options in a string are allowed for Perl moudle  */
1919             while(*cp && *cp++!='-');
1920             continue;
1921         default:
1922             /* bogus option but ignored */
1923             continue;
1924         }
1925     }
1926 }
1927
1928 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1929 {
1930     if (iconv_func){
1931         struct input_code *p = input_code_list;
1932         while (p->name){
1933             if (iconv_func == p->iconv_func){
1934                 return p;
1935             }
1936             p++;
1937         }
1938     }
1939     return 0;
1940 }
1941
1942 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1943 {
1944 #ifdef INPUT_CODE_FIX
1945     if (f || !input_f)
1946 #endif
1947         if (estab_f != f){
1948             estab_f = f;
1949         }
1950
1951     if (iconv_func
1952 #ifdef INPUT_CODE_FIX
1953         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1954 #endif
1955         ){
1956         iconv = iconv_func;
1957     }
1958 #ifdef CHECK_OPTION
1959     if (estab_f && iconv_for_check != iconv){
1960         struct input_code *p = find_inputcode_byfunc(iconv);
1961         if (p){
1962             set_input_codename(p->name);
1963             debug(input_codename);
1964         }
1965         iconv_for_check = iconv;
1966     }
1967 #endif
1968 }
1969
1970 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1971 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1972 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1973 #ifdef SHIFTJIS_CP932
1974 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B */
1975 #define SCORE_NO_EXIST (SCORE_CP932 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1976 #else
1977 #define SCORE_NO_EXIST (SCORE_DEPEND << 1)   /* \e$BB8:_$7$J$$J8;z\e(B */
1978 #endif
1979 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1980 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1981
1982 #define SCORE_INIT (SCORE_iMIME)
1983
1984 static const char score_table_A0[] = {
1985     0, 0, 0, 0,
1986     0, 0, 0, 0,
1987     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1988     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1989 };
1990
1991 static const char score_table_F0[] = {
1992     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1993     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1994     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1995     SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1996 };
1997
1998 void set_code_score(struct input_code *ptr, nkf_char score)
1999 {
2000     if (ptr){
2001         ptr->score |= score;
2002     }
2003 }
2004
2005 void clr_code_score(struct input_code *ptr, nkf_char score)
2006 {
2007     if (ptr){
2008         ptr->score &= ~score;
2009     }
2010 }
2011
2012 void code_score(struct input_code *ptr)
2013 {
2014     nkf_char c2 = ptr->buf[0];
2015 #ifdef UTF8_OUTPUT_ENABLE
2016     nkf_char c1 = ptr->buf[1];
2017 #endif
2018     if (c2 < 0){
2019         set_code_score(ptr, SCORE_ERROR);
2020     }else if (c2 == SSO){
2021         set_code_score(ptr, SCORE_KANA);
2022 #ifdef UTF8_OUTPUT_ENABLE
2023     }else if (!e2w_conv(c2, c1)){
2024         set_code_score(ptr, SCORE_NO_EXIST);
2025 #endif
2026     }else if ((c2 & 0x70) == 0x20){
2027         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2028     }else if ((c2 & 0x70) == 0x70){
2029         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2030     }else if ((c2 & 0x70) >= 0x50){
2031         set_code_score(ptr, SCORE_L2);
2032     }
2033 }
2034
2035 void status_disable(struct input_code *ptr)
2036 {
2037     ptr->stat = -1;
2038     ptr->buf[0] = -1;
2039     code_score(ptr);
2040     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2041 }
2042
2043 void status_push_ch(struct input_code *ptr, nkf_char c)
2044 {
2045     ptr->buf[ptr->index++] = c;
2046 }
2047
2048 void status_clear(struct input_code *ptr)
2049 {
2050     ptr->stat = 0;
2051     ptr->index = 0;
2052 }
2053
2054 void status_reset(struct input_code *ptr)
2055 {
2056     status_clear(ptr);
2057     ptr->score = SCORE_INIT;
2058 }
2059
2060 void status_reinit(struct input_code *ptr)
2061 {
2062     status_reset(ptr);
2063     ptr->_file_stat = 0;
2064 }
2065
2066 void status_check(struct input_code *ptr, nkf_char c)
2067 {
2068     if (c <= DEL && estab_f){
2069         status_reset(ptr);
2070     }
2071 }
2072
2073 void s_status(struct input_code *ptr, nkf_char c)
2074 {
2075     switch(ptr->stat){
2076       case -1:
2077           status_check(ptr, c);
2078           break;
2079       case 0:
2080           if (c <= DEL){
2081               break;
2082 #ifdef NUMCHAR_OPTION
2083           }else if (is_unicode_capsule(c)){
2084               break;
2085 #endif
2086           }else if (0xa1 <= c && c <= 0xdf){
2087               status_push_ch(ptr, SSO);
2088               status_push_ch(ptr, c);
2089               code_score(ptr);
2090               status_clear(ptr);
2091           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2092               ptr->stat = 1;
2093               status_push_ch(ptr, c);
2094 #ifdef SHIFTJIS_CP932
2095           }else if (cp51932_f
2096                     && is_ibmext_in_sjis(c)){
2097               ptr->stat = 2;
2098               status_push_ch(ptr, c);
2099 #endif /* SHIFTJIS_CP932 */
2100 #ifdef X0212_ENABLE
2101           }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2102               ptr->stat = 1;
2103               status_push_ch(ptr, c);
2104 #endif /* X0212_ENABLE */
2105           }else{
2106               status_disable(ptr);
2107           }
2108           break;
2109       case 1:
2110           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2111               status_push_ch(ptr, c);
2112               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2113               code_score(ptr);
2114               status_clear(ptr);
2115           }else{
2116               status_disable(ptr);
2117           }
2118           break;
2119       case 2:
2120 #ifdef SHIFTJIS_CP932
2121           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2122               status_push_ch(ptr, c);
2123               if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2124                   set_code_score(ptr, SCORE_CP932);
2125                   status_clear(ptr);
2126                   break;
2127               }
2128           }
2129 #endif /* SHIFTJIS_CP932 */
2130 #ifndef X0212_ENABLE
2131           status_disable(ptr);
2132 #endif
2133           break;
2134     }
2135 }
2136
2137 void e_status(struct input_code *ptr, nkf_char c)
2138 {
2139     switch (ptr->stat){
2140       case -1:
2141           status_check(ptr, c);
2142           break;
2143       case 0:
2144           if (c <= DEL){
2145               break;
2146 #ifdef NUMCHAR_OPTION
2147           }else if (is_unicode_capsule(c)){
2148               break;
2149 #endif
2150           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2151               ptr->stat = 1;
2152               status_push_ch(ptr, c);
2153 #ifdef X0212_ENABLE
2154           }else if (0x8f == c){
2155               ptr->stat = 2;
2156               status_push_ch(ptr, c);
2157 #endif /* X0212_ENABLE */
2158           }else{
2159               status_disable(ptr);
2160           }
2161           break;
2162       case 1:
2163           if (0xa1 <= c && c <= 0xfe){
2164               status_push_ch(ptr, c);
2165               code_score(ptr);
2166               status_clear(ptr);
2167           }else{
2168               status_disable(ptr);
2169           }
2170           break;
2171 #ifdef X0212_ENABLE
2172       case 2:
2173           if (0xa1 <= c && c <= 0xfe){
2174               ptr->stat = 1;
2175               status_push_ch(ptr, c);
2176           }else{
2177               status_disable(ptr);
2178           }
2179 #endif /* X0212_ENABLE */
2180     }
2181 }
2182
2183 #ifdef UTF8_INPUT_ENABLE
2184 void w_status(struct input_code *ptr, nkf_char c)
2185 {
2186     switch (ptr->stat){
2187       case -1:
2188           status_check(ptr, c);
2189           break;
2190       case 0:
2191           if (c <= DEL){
2192               break;
2193 #ifdef NUMCHAR_OPTION
2194           }else if (is_unicode_capsule(c)){
2195               break;
2196 #endif
2197           }else if (0xc0 <= c && c <= 0xdf){
2198               ptr->stat = 1;
2199               status_push_ch(ptr, c);
2200           }else if (0xe0 <= c && c <= 0xef){
2201               ptr->stat = 2;
2202               status_push_ch(ptr, c);
2203           }else if (0xf0 <= c && c <= 0xf4){
2204               ptr->stat = 3;
2205               status_push_ch(ptr, c);
2206           }else{
2207               status_disable(ptr);
2208           }
2209           break;
2210       case 1:
2211       case 2:
2212           if (0x80 <= c && c <= 0xbf){
2213               status_push_ch(ptr, c);
2214               if (ptr->index > ptr->stat){
2215                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2216                              && ptr->buf[2] == 0xbf);
2217                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2218                            &ptr->buf[0], &ptr->buf[1]);
2219                   if (!bom){
2220                       code_score(ptr);
2221                   }
2222                   status_clear(ptr);
2223               }
2224           }else{
2225               status_disable(ptr);
2226           }
2227           break;
2228       case 3:
2229         if (0x80 <= c && c <= 0xbf){
2230             if (ptr->index < ptr->stat){
2231                 status_push_ch(ptr, c);
2232             } else {
2233                 status_clear(ptr);
2234             }
2235           }else{
2236               status_disable(ptr);
2237           }
2238           break;
2239     }
2240 }
2241 #endif
2242
2243 void code_status(nkf_char c)
2244 {
2245     int action_flag = 1;
2246     struct input_code *result = 0;
2247     struct input_code *p = input_code_list;
2248     while (p->name){
2249         if (!p->status_func) {
2250             ++p;
2251             continue;
2252         }
2253         if (!p->status_func)
2254             continue;
2255         (p->status_func)(p, c);
2256         if (p->stat > 0){
2257             action_flag = 0;
2258         }else if(p->stat == 0){
2259             if (result){
2260                 action_flag = 0;
2261             }else{
2262                 result = p;
2263             }
2264         }
2265         ++p;
2266     }
2267
2268     if (action_flag){
2269         if (result && !estab_f){
2270             set_iconv(TRUE, result->iconv_func);
2271         }else if (c <= DEL){
2272             struct input_code *ptr = input_code_list;
2273             while (ptr->name){
2274                 status_reset(ptr);
2275                 ++ptr;
2276             }
2277         }
2278     }
2279 }
2280
2281 #ifndef WIN32DLL
2282 nkf_char std_getc(FILE *f)
2283 {
2284     if (std_gc_ndx){
2285         return std_gc_buf[--std_gc_ndx];
2286     }
2287     return getc(f);
2288 }
2289 #endif /*WIN32DLL*/
2290
2291 nkf_char std_ungetc(nkf_char c, FILE *f)
2292 {
2293     if (std_gc_ndx == STD_GC_BUFSIZE){
2294         return EOF;
2295     }
2296     std_gc_buf[std_gc_ndx++] = c;
2297     return c;
2298 }
2299
2300 #ifndef WIN32DLL
2301 void std_putc(nkf_char c)
2302 {
2303     if(c!=EOF)
2304       putchar(c);
2305 }
2306 #endif /*WIN32DLL*/
2307
2308 #if !defined(PERL_XS) && !defined(WIN32DLL)
2309 nkf_char noconvert(FILE *f)
2310 {
2311     nkf_char    c;
2312
2313     if (nop_f == 2)
2314         module_connection();
2315     while ((c = (*i_getc)(f)) != EOF)
2316       (*o_putc)(c);
2317     (*o_putc)(EOF);
2318     return 1;
2319 }
2320 #endif
2321
2322 void module_connection(void)
2323 {
2324     oconv = output_conv; 
2325     o_putc = std_putc;
2326
2327     /* replace continucation module, from output side */
2328
2329     /* output redicrection */
2330 #ifdef CHECK_OPTION
2331     if (noout_f || guess_f){
2332         o_putc = no_putc;
2333     }
2334 #endif
2335     if (mimeout_f) {
2336         o_mputc = o_putc;
2337         o_putc = mime_putc;
2338         if (mimeout_f == TRUE) {
2339             o_base64conv = oconv; oconv = base64_conv;
2340         }
2341         /* base64_count = 0; */
2342     }
2343
2344     if (crmode_f) {
2345         o_crconv = oconv; oconv = cr_conv;
2346     }
2347     if (rot_f) {
2348         o_rot_conv = oconv; oconv = rot_conv;
2349     }
2350     if (iso2022jp_f) {
2351         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2352     }
2353     if (hira_f) {
2354         o_hira_conv = oconv; oconv = hira_conv;
2355     }
2356     if (fold_f) {
2357         o_fconv = oconv; oconv = fold_conv;
2358         f_line = 0;
2359     }
2360     if (alpha_f || x0201_f) {
2361         o_zconv = oconv; oconv = z_conv;
2362     }
2363
2364     i_getc = std_getc;
2365     i_ungetc = std_ungetc;
2366     /* input redicrection */
2367 #ifdef INPUT_OPTION
2368     if (cap_f){
2369         i_cgetc = i_getc; i_getc = cap_getc;
2370         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2371     }
2372     if (url_f){
2373         i_ugetc = i_getc; i_getc = url_getc;
2374         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2375     }
2376 #endif
2377 #ifdef NUMCHAR_OPTION
2378     if (numchar_f){
2379         i_ngetc = i_getc; i_getc = numchar_getc;
2380         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2381     }
2382 #endif
2383 #ifdef UNICODE_NORMALIZATION
2384     if (nfc_f && input_f == UTF8_INPUT){
2385         i_nfc_getc = i_getc; i_getc = nfc_getc;
2386         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2387     }
2388 #endif
2389     if (mime_f && mimebuf_f==FIXED_MIME) {
2390         i_mgetc = i_getc; i_getc = mime_getc;
2391         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2392     }
2393     if (broken_f & 1) {
2394         i_bgetc = i_getc; i_getc = broken_getc;
2395         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2396     }
2397     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2398         set_iconv(-TRUE, e_iconv);
2399     } else if (input_f == SJIS_INPUT) {
2400         set_iconv(-TRUE, s_iconv);
2401 #ifdef UTF8_INPUT_ENABLE
2402     } else if (input_f == UTF8_INPUT) {
2403         set_iconv(-TRUE, w_iconv);
2404     } else if (input_f == UTF16_INPUT) {
2405         set_iconv(-TRUE, w_iconv16);
2406     } else if (input_f == UTF32_INPUT) {
2407         set_iconv(-TRUE, w_iconv32);
2408 #endif
2409     } else {
2410         set_iconv(FALSE, e_iconv);
2411     }
2412
2413     {
2414         struct input_code *p = input_code_list;
2415         while (p->name){
2416             status_reinit(p++);
2417         }
2418     }
2419 }
2420
2421 /*
2422  * Check and Ignore BOM
2423  */
2424 void check_bom(FILE *f)
2425 {
2426     int c2;
2427     switch(c2 = (*i_getc)(f)){
2428     case 0x00:
2429         if((c2 = (*i_getc)(f)) == 0x00){
2430             if((c2 = (*i_getc)(f)) == 0xFE){
2431                 if((c2 = (*i_getc)(f)) == 0xFF){
2432                     if(!input_f){
2433                         set_iconv(TRUE, w_iconv32);
2434                     }
2435                     if (iconv == w_iconv32) {
2436                         input_endian = ENDIAN_BIG;
2437                         return;
2438                     }
2439                     (*i_ungetc)(0xFF,f);
2440                 }else (*i_ungetc)(c2,f);
2441                 (*i_ungetc)(0xFE,f);
2442             }else if(c2 == 0xFF){
2443                 if((c2 = (*i_getc)(f)) == 0xFE){
2444                     if(!input_f){
2445                         set_iconv(TRUE, w_iconv32);
2446                     }
2447                     if (iconv == w_iconv32) {
2448                         input_endian = ENDIAN_2143;
2449                         return;
2450                     }
2451                     (*i_ungetc)(0xFF,f);
2452                 }else (*i_ungetc)(c2,f);
2453                 (*i_ungetc)(0xFF,f);
2454             }else (*i_ungetc)(c2,f);
2455             (*i_ungetc)(0x00,f);
2456         }else (*i_ungetc)(c2,f);
2457         (*i_ungetc)(0x00,f);
2458         break;
2459     case 0xEF:
2460         if((c2 = (*i_getc)(f)) == 0xBB){
2461             if((c2 = (*i_getc)(f)) == 0xBF){
2462                 if(!input_f){
2463                     set_iconv(TRUE, w_iconv);
2464                 }
2465                 if (iconv == w_iconv) {
2466                     return;
2467                 }
2468                 (*i_ungetc)(0xBF,f);
2469             }else (*i_ungetc)(c2,f);
2470             (*i_ungetc)(0xBB,f);
2471         }else (*i_ungetc)(c2,f);
2472         (*i_ungetc)(0xEF,f);
2473         break;
2474     case 0xFE:
2475         if((c2 = (*i_getc)(f)) == 0xFF){
2476             if((c2 = (*i_getc)(f)) == 0x00){
2477                 if((c2 = (*i_getc)(f)) == 0x00){
2478                     if(!input_f){
2479                         set_iconv(TRUE, w_iconv32);
2480                     }
2481                     if (iconv == w_iconv32) {
2482                         input_endian = ENDIAN_3412;
2483                         return;
2484                     }
2485                     (*i_ungetc)(0x00,f);
2486                 }else (*i_ungetc)(c2,f);
2487                 (*i_ungetc)(0x00,f);
2488             }else (*i_ungetc)(c2,f);
2489             if(!input_f){
2490                 set_iconv(TRUE, w_iconv16);
2491             }
2492             if (iconv == w_iconv16) {
2493                 input_endian = ENDIAN_BIG;
2494                 return;
2495             }
2496             (*i_ungetc)(0xFF,f);
2497         }else (*i_ungetc)(c2,f);
2498         (*i_ungetc)(0xFE,f);
2499         break;
2500     case 0xFF:
2501         if((c2 = (*i_getc)(f)) == 0xFE){
2502             if((c2 = (*i_getc)(f)) == 0x00){
2503                 if((c2 = (*i_getc)(f)) == 0x00){
2504                     if(!input_f){
2505                         set_iconv(TRUE, w_iconv32);
2506                     }
2507                     if (iconv == w_iconv32) {
2508                         input_endian = ENDIAN_LITTLE;
2509                         return;
2510                     }
2511                     (*i_ungetc)(0x00,f);
2512                 }else (*i_ungetc)(c2,f);
2513                 (*i_ungetc)(0x00,f);
2514             }else (*i_ungetc)(c2,f);
2515             if(!input_f){
2516                 set_iconv(TRUE, w_iconv16);
2517             }
2518             if (iconv == w_iconv16) {
2519                 input_endian = ENDIAN_LITTLE;
2520                 return;
2521             }
2522             (*i_ungetc)(0xFE,f);
2523         }else (*i_ungetc)(c2,f);
2524         (*i_ungetc)(0xFF,f);
2525         break;
2526     default:
2527         (*i_ungetc)(c2,f);
2528         break;
2529     }
2530 }
2531
2532 /*
2533    Conversion main loop. Code detection only. 
2534  */
2535
2536 nkf_char kanji_convert(FILE *f)
2537 {
2538     nkf_char    c3, c2=0, c1, c0=0;
2539     int is_8bit = FALSE;
2540
2541     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2542 #ifdef UTF8_INPUT_ENABLE
2543        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2544 #endif
2545       ){
2546         is_8bit = TRUE;
2547     }
2548
2549     input_mode = ASCII;
2550     output_mode = ASCII;
2551     shift_mode = FALSE;
2552
2553 #define NEXT continue      /* no output, get next */
2554 #define SEND ;             /* output c1 and c2, get next */
2555 #define LAST break         /* end of loop, go closing  */
2556
2557     module_connection();
2558     check_bom(f);
2559
2560     while ((c1 = (*i_getc)(f)) != EOF) {
2561 #ifdef INPUT_CODE_FIX
2562         if (!input_f)
2563 #endif
2564             code_status(c1);
2565         if (c2) {
2566             /* second byte */
2567             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2568                 /* in case of 8th bit is on */
2569                 if (!estab_f&&!mime_decode_mode) {
2570                     /* in case of not established yet */
2571                     /* It is still ambiguious */
2572                     if (h_conv(f, c2, c1)==EOF) 
2573                         LAST;
2574                     else 
2575                         c2 = 0;
2576                     NEXT;
2577                 } else {
2578                     /* in case of already established */
2579                     if (c1 < AT) {
2580                         /* ignore bogus code and not CP5022x UCD */
2581                         c2 = 0;
2582                         NEXT;
2583                     } else {
2584                         SEND;
2585                     }
2586                 }
2587             } else
2588                 /* second byte, 7 bit code */
2589                 /* it might be kanji shitfted */
2590                 if ((c1 == DEL) || (c1 <= SPACE)) {
2591                     /* ignore bogus first code */
2592                     c2 = 0;
2593                     NEXT;
2594                 } else
2595                     SEND;
2596         } else {
2597             /* first byte */
2598 #ifdef UTF8_INPUT_ENABLE
2599             if (iconv == w_iconv16) {
2600                 if (input_endian == ENDIAN_BIG) {
2601                     c2 = c1;
2602                     if ((c1 = (*i_getc)(f)) != EOF) {
2603                         if (0xD8 <= c2 && c2 <= 0xDB) {
2604                             if ((c0 = (*i_getc)(f)) != EOF) {
2605                                 c0 <<= 8;
2606                                 if ((c3 = (*i_getc)(f)) != EOF) {
2607                                     c0 |= c3;
2608                                 } else c2 = EOF;
2609                             } else c2 = EOF;
2610                         }
2611                     } else c2 = EOF;
2612                 } else {
2613                     if ((c2 = (*i_getc)(f)) != EOF) {
2614                         if (0xD8 <= c2 && c2 <= 0xDB) {
2615                             if ((c3 = (*i_getc)(f)) != EOF) {
2616                                 if ((c0 = (*i_getc)(f)) != EOF) {
2617                                     c0 <<= 8;
2618                                     c0 |= c3;
2619                                 } else c2 = EOF;
2620                             } else c2 = EOF;
2621                         }
2622                     } else c2 = EOF;
2623                 }
2624                 SEND;
2625             } else if(iconv == w_iconv32){
2626                 int c3 = c1;
2627                 if((c2 = (*i_getc)(f)) != EOF &&
2628                    (c1 = (*i_getc)(f)) != EOF &&
2629                    (c0 = (*i_getc)(f)) != EOF){
2630                     switch(input_endian){
2631                     case ENDIAN_BIG:
2632                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2633                         break;
2634                     case ENDIAN_LITTLE:
2635                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2636                         break;
2637                     case ENDIAN_2143:
2638                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2639                         break;
2640                     case ENDIAN_3412:
2641                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2642                         break;
2643                     }
2644                     c2 = 0;
2645                 }else{
2646                     c2 = EOF;
2647                 }
2648                 SEND;
2649             } else
2650 #endif
2651 #ifdef NUMCHAR_OPTION
2652             if (is_unicode_capsule(c1)){
2653                 SEND;
2654             } else
2655 #endif
2656             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2657                 /* 8 bit code */
2658                 if (!estab_f && !iso8859_f) {
2659                     /* not established yet */
2660                     c2 = c1;
2661                     NEXT;
2662                 } else { /* estab_f==TRUE */
2663                     if (iso8859_f) {
2664                         c2 = ISO8859_1;
2665                         c1 &= 0x7f;
2666                         SEND;
2667                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2668                         /* SJIS X0201 Case... */
2669                         if(iso2022jp_f && x0201_f==NO_X0201) {
2670                             (*oconv)(GETA1, GETA2);
2671                             NEXT;
2672                         } else {
2673                             c2 = X0201;
2674                             c1 &= 0x7f;
2675                             SEND;
2676                         }
2677                     } else if (c1==SSO && iconv != s_iconv) {
2678                         /* EUC X0201 Case */
2679                         c1 = (*i_getc)(f);  /* skip SSO */
2680                         code_status(c1);
2681                         if (SSP<=c1 && c1<0xe0) {
2682                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2683                                 (*oconv)(GETA1, GETA2);
2684                                 NEXT;
2685                             } else {
2686                                 c2 = X0201;
2687                                 c1 &= 0x7f;
2688                                 SEND;
2689                             }
2690                         } else  { /* bogus code, skip SSO and one byte */
2691                             NEXT;
2692                         }
2693                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2694                                (c1 == 0xFD || c1 == 0xFE)) {
2695                         /* CP10001 */
2696                         c2 = X0201;
2697                         c1 &= 0x7f;
2698                         SEND;
2699                     } else {
2700                        /* already established */
2701                        c2 = c1;
2702                        NEXT;
2703                     }
2704                 }
2705             } else if ((c1 > SPACE) && (c1 != DEL)) {
2706                 /* in case of Roman characters */
2707                 if (shift_mode) { 
2708                     /* output 1 shifted byte */
2709                     if (iso8859_f) {
2710                         c2 = ISO8859_1;
2711                         SEND;
2712                     } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2713                       /* output 1 shifted byte */
2714                         if(iso2022jp_f && x0201_f==NO_X0201) {
2715                             (*oconv)(GETA1, GETA2);
2716                             NEXT;
2717                         } else {
2718                             c2 = X0201;
2719                             SEND;
2720                         }
2721                     } else {
2722                         /* look like bogus code */
2723                         NEXT;
2724                     }
2725                 } else if (input_mode == X0208 || input_mode == X0212 ||
2726                            input_mode == X0213_1 || input_mode == X0213_2) {
2727                     /* in case of Kanji shifted */
2728                     c2 = c1;
2729                     NEXT;
2730                 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2731                     /* Check MIME code */
2732                     if ((c1 = (*i_getc)(f)) == EOF) {
2733                         (*oconv)(0, '=');
2734                         LAST;
2735                     } else if (c1 == '?') {
2736                         /* =? is mime conversion start sequence */
2737                         if(mime_f == STRICT_MIME) {
2738                             /* check in real detail */
2739                             if (mime_begin_strict(f) == EOF) 
2740                                 LAST;
2741                             else
2742                                 NEXT;
2743                         } else if (mime_begin(f) == EOF) 
2744                             LAST;
2745                         else
2746                             NEXT;
2747                     } else {
2748                         (*oconv)(0, '=');
2749                         (*i_ungetc)(c1,f);
2750                         NEXT;
2751                     }
2752                 } else {
2753                     /* normal ASCII code */ 
2754                     SEND;
2755                 }
2756             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {\r
2757                 shift_mode = FALSE; 
2758                 NEXT;
2759             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {\r
2760                 shift_mode = TRUE; 
2761                 NEXT;
2762             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {\r
2763                 if ((c1 = (*i_getc)(f)) == EOF) {
2764                     /*  (*oconv)(0, ESC); don't send bogus code */
2765                     LAST;
2766                 } else if (c1 == '$') {
2767                     if ((c1 = (*i_getc)(f)) == EOF) {
2768                         /*
2769                         (*oconv)(0, ESC); don't send bogus code 
2770                         (*oconv)(0, '$'); */
2771                         LAST;
2772                     } else if (c1 == '@'|| c1 == 'B') {
2773                         /* This is kanji introduction */
2774                         input_mode = X0208;
2775                         shift_mode = FALSE;
2776                         set_input_codename("ISO-2022-JP");
2777 #ifdef CHECK_OPTION
2778                         debug(input_codename);
2779 #endif
2780                         NEXT;
2781                     } else if (c1 == '(') {
2782                         if ((c1 = (*i_getc)(f)) == EOF) {
2783                             /* don't send bogus code 
2784                             (*oconv)(0, ESC);
2785                             (*oconv)(0, '$');
2786                             (*oconv)(0, '(');
2787                                 */
2788                             LAST;
2789                         } else if (c1 == '@'|| c1 == 'B') {
2790                             /* This is kanji introduction */
2791                             input_mode = X0208;
2792                             shift_mode = FALSE;
2793                             NEXT;
2794 #ifdef X0212_ENABLE
2795                         } else if (c1 == 'D'){
2796                             input_mode = X0212;
2797                             shift_mode = FALSE;
2798                             NEXT;
2799 #endif /* X0212_ENABLE */
2800                         } else if (c1 == (X0213_1&0x7F)){
2801                             input_mode = X0213_1;
2802                             shift_mode = FALSE;
2803                             NEXT;
2804                         } else if (c1 == (X0213_2&0x7F)){
2805                             input_mode = X0213_2;
2806                             shift_mode = FALSE;
2807                             NEXT;
2808                         } else {
2809                             /* could be some special code */
2810                             (*oconv)(0, ESC);
2811                             (*oconv)(0, '$');
2812                             (*oconv)(0, '(');
2813                             (*oconv)(0, c1);
2814                             NEXT;
2815                         }
2816                     } else if (broken_f&0x2) {
2817                         /* accept any ESC-(-x as broken code ... */
2818                         input_mode = X0208;
2819                         shift_mode = FALSE;
2820                         NEXT;
2821                     } else {
2822                         (*oconv)(0, ESC);
2823                         (*oconv)(0, '$');
2824                         (*oconv)(0, c1);
2825                         NEXT;
2826                     }
2827                 } else if (c1 == '(') {
2828                     if ((c1 = (*i_getc)(f)) == EOF) {
2829                         /* don't send bogus code 
2830                         (*oconv)(0, ESC);
2831                         (*oconv)(0, '('); */
2832                         LAST;
2833                     } else {
2834                         if (c1 == 'I') {
2835                             /* This is X0201 kana introduction */
2836                             input_mode = X0201; shift_mode = X0201;
2837                             NEXT;
2838                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2839                             /* This is X0208 kanji introduction */
2840                             input_mode = ASCII; shift_mode = FALSE;
2841                             NEXT;
2842                         } else if (broken_f&0x2) {
2843                             input_mode = ASCII; shift_mode = FALSE;
2844                             NEXT;
2845                         } else {
2846                             (*oconv)(0, ESC);
2847                             (*oconv)(0, '(');
2848                             /* maintain various input_mode here */
2849                             SEND;
2850                         }
2851                     }
2852                } else if ( c1 == 'N' || c1 == 'n' ){
2853                    /* SS2 */
2854                    c3 = (*i_getc)(f);  /* skip SS2 */
2855                    if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2856                        c1 = c3;
2857                        c2 = X0201;
2858                        SEND;
2859                    }else{
2860                        (*i_ungetc)(c3, f);
2861                        /* lonely ESC  */
2862                        (*oconv)(0, ESC);
2863                        SEND;
2864                    }
2865                 } else {
2866                     /* lonely ESC  */
2867                     (*oconv)(0, ESC);
2868                     SEND;
2869                 }
2870             } else if (c1 == ESC && iconv == s_iconv) {
2871                 /* ESC in Shift_JIS */
2872                 if ((c1 = (*i_getc)(f)) == EOF) {
2873                     /*  (*oconv)(0, ESC); don't send bogus code */
2874                     LAST;
2875                 } else if (c1 == '$') {
2876                     /* J-PHONE emoji */
2877                     if ((c1 = (*i_getc)(f)) == EOF) {
2878                         /*
2879                            (*oconv)(0, ESC); don't send bogus code 
2880                            (*oconv)(0, '$'); */
2881                         LAST;
2882                     } else {
2883                         if (('E' <= c1 && c1 <= 'G') ||
2884                             ('O' <= c1 && c1 <= 'Q')) {
2885                             /*
2886                                NUM : 0 1 2 3 4 5
2887                                BYTE: G E F O P Q
2888                                C%7 : 1 6 0 2 3 4
2889                                C%7 : 0 1 2 3 4 5 6
2890                                NUM : 2 0 3 4 5 X 1
2891                              */
2892                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2893                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SPACE + 0xE000 + CLASS_UNICODE;
2894                             while ((c1 = (*i_getc)(f)) != EOF) {
2895                                 if (SPACE <= c1 && c1 <= 'z') {
2896                                     (*oconv)(0, c1 + c0);
2897                                 } else break; /* c1 == SO */
2898                             }
2899                         }
2900                     }
2901                     if (c1 == EOF) LAST;
2902                     NEXT;
2903                 } else {
2904                     /* lonely ESC  */
2905                     (*oconv)(0, ESC);
2906                     SEND;
2907                 }
2908             } else if (c1 == NL || c1 == CR) {
2909                 if (broken_f&4) {
2910                     input_mode = ASCII; set_iconv(FALSE, 0);
2911                     SEND;
2912                 } else if (mime_decode_f && !mime_decode_mode){
2913                     if (c1 == NL) {
2914                         if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2915                             i_ungetc(SPACE,f);
2916                             continue;
2917                         } else {
2918                             i_ungetc(c1,f);
2919                         }
2920                         c1 = NL;
2921                         SEND;
2922                     } else  { /* if (c1 == CR)*/
2923                         if ((c1=(*i_getc)(f))!=EOF) {
2924                             if (c1==SPACE) {
2925                                 i_ungetc(SPACE,f);
2926                                 continue;
2927                             } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2928                                 i_ungetc(SPACE,f);
2929                                 continue;
2930                             } else {
2931                                 i_ungetc(c1,f);
2932                             }
2933                             i_ungetc(NL,f);
2934                         } else {
2935                             i_ungetc(c1,f);
2936                         }
2937                         c1 = CR;
2938                         SEND;
2939                     }
2940                 }
2941                 if (!crmode_f) {
2942                     if (prev_cr && c1 == NL) crmode_f = CRLF;
2943                     else crmode_f = c1;
2944                 }
2945             } else if (c1 == DEL && input_mode == X0208 ) {
2946                 /* CP5022x */
2947                 c2 = c1;
2948                 NEXT;
2949             } else 
2950                 SEND;
2951         }
2952         /* send: */
2953         switch(input_mode){
2954         case ASCII:
2955             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2956             case -2:
2957                 /* 4 bytes UTF-8 */
2958                 if ((c0 = (*i_getc)(f)) != EOF) {
2959                     code_status(c0);
2960                     c0 <<= 8;
2961                     if ((c3 = (*i_getc)(f)) != EOF) {
2962                         code_status(c3);
2963                         (*iconv)(c2, c1, c0|c3);
2964                     }
2965                 }
2966                 break;
2967             case -1:
2968                 /* 3 bytes EUC or UTF-8 */
2969                 if ((c0 = (*i_getc)(f)) != EOF) {
2970                     code_status(c0);
2971                     (*iconv)(c2, c1, c0);
2972                 }
2973                 break;
2974             }
2975             break;
2976         case X0208:
2977         case X0213_1:
2978             if (ms_ucs_map_f &&
2979                 0x7F <= c2 && c2 <= 0x92 &&
2980                 0x21 <= c1 && c1 <= 0x7E) {
2981                 /* CP932 UDC */
2982                 if(c1 == 0x7F) return 0;
2983                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2984                 c2 = 0;
2985             }
2986             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2987             break;
2988 #ifdef X0212_ENABLE
2989         case X0212:
2990             (*oconv)(PREFIX_EUCG3 | c2, c1);
2991             break;
2992 #endif /* X0212_ENABLE */
2993         case X0213_2:
2994             (*oconv)(PREFIX_EUCG3 | c2, c1);
2995             break;
2996         default:
2997             (*oconv)(input_mode, c1);  /* other special case */
2998         }
2999
3000         c2 = 0;
3001         c0 = 0;
3002         continue;
3003         /* goto next_word */
3004     }
3005
3006     /* epilogue */
3007     (*iconv)(EOF, 0, 0);
3008     if (!is_inputcode_set)
3009     {
3010         if (is_8bit) {
3011             struct input_code *p = input_code_list;
3012             struct input_code *result = p;
3013             while (p->name){
3014                 if (p->score < result->score) result = p;
3015                 ++p;
3016             }
3017             set_input_codename(result->name);
3018         }
3019     }
3020     return 1;
3021 }
3022
3023 nkf_char
3024 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3025 {
3026     nkf_char ret, c3, c0;
3027     int hold_index;
3028
3029
3030     /** it must NOT be in the kanji shifte sequence      */
3031     /** it must NOT be written in JIS7                   */
3032     /** and it must be after 2 byte 8bit code            */
3033
3034     hold_count = 0;
3035     push_hold_buf(c2);
3036     push_hold_buf(c1);
3037
3038     while ((c1 = (*i_getc)(f)) != EOF) {
3039         if (c1 == ESC){
3040             (*i_ungetc)(c1,f);
3041             break;
3042         }
3043         code_status(c1);
3044         if (push_hold_buf(c1) == EOF || estab_f){
3045             break;
3046         }
3047     }
3048
3049     if (!estab_f){
3050         struct input_code *p = input_code_list;
3051         struct input_code *result = p;
3052         if (c1 == EOF){
3053             code_status(c1);
3054         }
3055         while (p->name){
3056             if (p->status_func && p->score < result->score){
3057                 result = p;
3058             }
3059             ++p;
3060         }
3061         set_iconv(TRUE, result->iconv_func);
3062     }
3063
3064
3065     /** now,
3066      ** 1) EOF is detected, or
3067      ** 2) Code is established, or
3068      ** 3) Buffer is FULL (but last word is pushed)
3069      **
3070      ** in 1) and 3) cases, we continue to use
3071      ** Kanji codes by oconv and leave estab_f unchanged.
3072      **/
3073
3074     ret = c1;
3075     hold_index = 0;
3076     while (hold_index < hold_count){
3077         c2 = hold_buf[hold_index++];
3078         if (c2 <= DEL
3079 #ifdef NUMCHAR_OPTION
3080             || is_unicode_capsule(c2)
3081 #endif
3082             ){
3083             (*iconv)(0, c2, 0);
3084             continue;
3085         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3086             (*iconv)(X0201, c2, 0);
3087             continue;
3088         }
3089         if (hold_index < hold_count){
3090             c1 = hold_buf[hold_index++];
3091         }else{
3092             c1 = (*i_getc)(f);
3093             if (c1 == EOF){
3094                 c3 = EOF;
3095                 break;
3096             }
3097             code_status(c1);
3098         }
3099         c0 = 0;
3100         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3101         case -2:
3102             /* 4 bytes UTF-8 */
3103             if (hold_index < hold_count){
3104                 c0 = hold_buf[hold_index++];
3105             } else if ((c0 = (*i_getc)(f)) == EOF) {
3106                 ret = EOF;
3107                 break;
3108             } else {
3109                 code_status(c0);
3110                 c0 <<= 8;
3111                 if (hold_index < hold_count){
3112                     c3 = hold_buf[hold_index++];
3113                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3114                     c0 = ret = EOF;
3115                     break;
3116                 } else {
3117                     code_status(c3);
3118                     (*iconv)(c2, c1, c0|c3);
3119                 }
3120             }
3121             break;
3122         case -1:
3123             /* 3 bytes EUC or UTF-8 */
3124             if (hold_index < hold_count){
3125                 c0 = hold_buf[hold_index++];
3126             } else if ((c0 = (*i_getc)(f)) == EOF) {
3127                 ret = EOF;
3128                 break;
3129             } else {
3130                 code_status(c0);
3131             }
3132             (*iconv)(c2, c1, c0);
3133             break;
3134         }
3135         if (c0 == EOF) break;
3136     }
3137     return ret;
3138 }
3139
3140 nkf_char push_hold_buf(nkf_char c2)
3141 {
3142     if (hold_count >= HOLD_SIZE*2)
3143         return (EOF);
3144     hold_buf[hold_count++] = (unsigned char)c2;
3145     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3146 }
3147
3148 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3149 {
3150 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3151     nkf_char val;
3152 #endif
3153     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3154 #ifdef SHIFTJIS_CP932
3155     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3156         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3157         if (val){
3158             c2 = val >> 8;
3159             c1 = val & 0xff;
3160         }
3161     }
3162     if (cp932inv_f
3163         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3164         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3165         if (c){
3166             c2 = c >> 8;
3167             c1 = c & 0xff;
3168         }
3169     }
3170 #endif /* SHIFTJIS_CP932 */
3171 #ifdef X0212_ENABLE
3172     if (!x0213_f && is_ibmext_in_sjis(c2)){
3173         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3174         if (val){
3175             if (val > 0x7FFF){
3176                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3177                 c1 = val & 0xff;
3178             }else{
3179                 c2 = val >> 8;
3180                 c1 = val & 0xff;
3181             }
3182             if (p2) *p2 = c2;
3183             if (p1) *p1 = c1;
3184             return 0;
3185         }
3186     }
3187 #endif
3188     if(c2 >= 0x80){
3189         if(x0213_f && c2 >= 0xF0){
3190             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3191                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3192             }else{ /* 78<=k<=94 */
3193                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3194                 if (0x9E < c1) c2++;
3195             }
3196         }else{
3197             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3198             if (0x9E < c1) c2++;
3199         }
3200         if (c1 < 0x9F)
3201             c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
3202         else {
3203             c1 = c1 - 0x7E;
3204         }
3205     }
3206
3207 #ifdef X0212_ENABLE
3208     c2 = x0212_unshift(c2);
3209 #endif
3210     if (p2) *p2 = c2;
3211     if (p1) *p1 = c1;
3212     return 0;
3213 }
3214
3215 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3216 {
3217     if (c2 == X0201) {
3218         c1 &= 0x7f;
3219     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
3220         /* NOP */
3221     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3222         /* CP932 UDC */
3223         if(c1 == 0x7F) return 0;
3224         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3225         c2 = 0;
3226     } else {
3227         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3228         if (ret) return ret;
3229     }
3230     (*oconv)(c2, c1);
3231     return 0;
3232 }
3233
3234 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3235 {
3236     if (c2 == X0201) {
3237         c1 &= 0x7f;
3238 #ifdef X0212_ENABLE
3239     }else if (c2 == 0x8f){
3240         if (c0 == 0){
3241             return -1;
3242         }
3243         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3244             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3245             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3246             c2 = 0;
3247         } else {
3248             c2 = (c2 << 8) | (c1 & 0x7f);
3249             c1 = c0 & 0x7f;
3250 #ifdef SHIFTJIS_CP932
3251             if (cp51932_f){
3252                 nkf_char s2, s1;
3253                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3254                     s2e_conv(s2, s1, &c2, &c1);
3255                     if (c2 < 0x100){
3256                         c1 &= 0x7f;
3257                         c2 &= 0x7f;
3258                     }
3259                 }
3260             }
3261 #endif /* SHIFTJIS_CP932 */
3262         }
3263 #endif /* X0212_ENABLE */
3264     } else if (c2 == SSO){
3265         c2 = X0201;
3266         c1 &= 0x7f;
3267     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
3268         /* NOP */
3269     } else {
3270         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3271             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3272             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3273             c2 = 0;
3274         } else {
3275             c1 &= 0x7f;
3276             c2 &= 0x7f;
3277 #ifdef SHIFTJIS_CP932
3278             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3279                 nkf_char s2, s1;
3280                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3281                     s2e_conv(s2, s1, &c2, &c1);
3282                     if (c2 < 0x100){
3283                         c1 &= 0x7f;
3284                         c2 &= 0x7f;
3285                     }
3286                 }
3287             }
3288 #endif /* SHIFTJIS_CP932 */
3289         }
3290     }
3291     (*oconv)(c2, c1);
3292     return 0;
3293 }
3294
3295 #ifdef UTF8_INPUT_ENABLE
3296 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3297 {
3298     nkf_char ret = 0;
3299
3300     if (!c1){
3301         *p2 = 0;
3302         *p1 = c2;
3303     }else if (0xc0 <= c2 && c2 <= 0xef) {
3304         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3305 #ifdef NUMCHAR_OPTION
3306         if (ret > 0){
3307             if (p2) *p2 = 0;
3308             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3309             ret = 0;
3310         }
3311 #endif
3312     }
3313     return ret;
3314 }
3315
3316 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3317 {
3318     nkf_char ret = 0;
3319     static const char w_iconv_utf8_1st_byte[] =
3320     { /* 0xC0 - 0xFF */
3321         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3322         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3323         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3324         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3325     
3326     if (c2 < 0 || 0xff < c2) {
3327     }else if (c2 == 0) { /* 0 : 1 byte*/
3328         c0 = 0;
3329     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3330         return 0;
3331     } else{
3332         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3333         case 21:
3334             if (c1 < 0x80 || 0xBF < c1) return 0;
3335             break;
3336         case 30:
3337             if (c0 == 0) return -1;
3338             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3339                 return 0;
3340             break;
3341         case 31:
3342         case 33:
3343             if (c0 == 0) return -1;
3344             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3345                 return 0;
3346             break;
3347         case 32:
3348             if (c0 == 0) return -1;
3349             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3350                 return 0;
3351             break;
3352         case 40:
3353             if (c0 == 0) return -2;
3354             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3355                 return 0;
3356             break;
3357         case 41:
3358             if (c0 == 0) return -2;
3359             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3360                 return 0;
3361             break;
3362         case 42:
3363             if (c0 == 0) return -2;
3364             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3365                 return 0;
3366             break;
3367         default:
3368             return 0;
3369             break;
3370         }
3371     }
3372     if (c2 == 0 || c2 == EOF){
3373     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3374         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3375         c2 = 0;
3376     } else {
3377         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3378     }
3379     if (ret == 0){
3380         (*oconv)(c2, c1);
3381     }
3382     return ret;
3383 }
3384 #endif
3385
3386 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3387 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3388 {
3389     val &= VALUE_MASK;
3390     if (val < 0x80){
3391         *p2 = val;
3392         *p1 = 0;
3393         *p0 = 0;
3394     }else if (val < 0x800){
3395         *p2 = 0xc0 | (val >> 6);
3396         *p1 = 0x80 | (val & 0x3f);
3397         *p0 = 0;
3398     } else if (val <= NKF_INT32_C(0xFFFF)) {
3399         *p2 = 0xe0 | (val >> 12);
3400         *p1 = 0x80 | ((val >> 6) & 0x3f);
3401         *p0 = 0x80 | (val        & 0x3f);
3402     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3403         *p2 = 0xe0 |  (val >> 16);
3404         *p1 = 0x80 | ((val >> 12) & 0x3f);
3405         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3406     } else {
3407         *p2 = 0;
3408         *p1 = 0;
3409         *p0 = 0;
3410     }
3411 }
3412 #endif
3413
3414 #ifdef UTF8_INPUT_ENABLE
3415 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3416 {
3417     nkf_char val;
3418     if (c2 >= 0xf8) {
3419         val = -1;
3420     } else if (c2 >= 0xf0){
3421         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3422         val = (c2 & 0x0f) << 18;
3423         val |= (c1 & 0x3f) << 12;
3424         val |= (c0 & 0x3f00) >> 2;
3425         val |= (c0 & 0x3f);
3426     }else if (c2 >= 0xe0){
3427         val = (c2 & 0x0f) << 12;
3428         val |= (c1 & 0x3f) << 6;
3429         val |= (c0 & 0x3f);
3430     }else if (c2 >= 0xc0){
3431         val = (c2 & 0x1f) << 6;
3432         val |= (c1 & 0x3f);
3433     }else{
3434         val = c2;
3435     }
3436     return val;
3437 }
3438
3439 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3440 {
3441     nkf_char c2, c1, c0;
3442     nkf_char ret = 0;
3443     val &= VALUE_MASK;
3444     if (val < 0x80){
3445         *p2 = 0;
3446         *p1 = val;
3447     }else{
3448         w16w_conv(val, &c2, &c1, &c0);
3449         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3450 #ifdef NUMCHAR_OPTION
3451         if (ret > 0){
3452             *p2 = 0;
3453             *p1 = CLASS_UNICODE | val;
3454             ret = 0;
3455         }
3456 #endif
3457     }
3458     return ret;
3459 }
3460 #endif
3461
3462 #ifdef UTF8_INPUT_ENABLE
3463 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3464 {
3465     nkf_char ret = 0;
3466     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3467         (*oconv)(c2, c1);
3468         return 0;
3469     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3470         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3471             return -2;
3472         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3473         c2 = 0;
3474     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3475         /*
3476            return 2;
3477         */
3478         return 1;
3479     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3480     if (ret) return ret;
3481     (*oconv)(c2, c1);
3482     return 0;
3483 }
3484
3485 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3486 {
3487     int ret = 0;
3488
3489     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3490     } else if (is_unicode_bmp(c1)) {
3491         ret = w16e_conv(c1, &c2, &c1);
3492     } else {
3493         c2 = 0;
3494         c1 =  CLASS_UNICODE | c1;
3495     }
3496     if (ret) return ret;
3497     (*oconv)(c2, c1);
3498     return 0;
3499 }
3500
3501 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3502 {
3503     const unsigned short *const *pp;
3504     const unsigned short *const *const *ppp;
3505     static const char no_best_fit_chars_table_C2[] =
3506     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3507         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3508         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3509         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3510     static const char no_best_fit_chars_table_C2_ms[] =
3511     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3512         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3513         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3514         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3515     static const char no_best_fit_chars_table_932_C2[] =
3516     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3517         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3518         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3519         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3520     static const char no_best_fit_chars_table_932_C3[] =
3521     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3522         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3523         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3524         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3525     nkf_char ret = 0;
3526
3527     if(c2 < 0x80){
3528         *p2 = 0;
3529         *p1 = c2;
3530     }else if(c2 < 0xe0){
3531         if(no_best_fit_chars_f){
3532             if(ms_ucs_map_f == UCS_MAP_CP932){
3533                 switch(c2){
3534                 case 0xC2:
3535                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3536                     break;
3537                 case 0xC3:
3538                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3539                     break;
3540                 }
3541             }else if(!cp932inv_f){
3542                 switch(c2){
3543                 case 0xC2:
3544                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3545                     break;
3546                 case 0xC3:
3547                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3548                     break;
3549                 }
3550             }else if(ms_ucs_map_f == UCS_MAP_MS){
3551                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3552             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3553                 switch(c2){
3554                 case 0xC2:
3555                     switch(c1){
3556        &n