OSDN Git Service

c8c50b4dd50e232e30bdfcd1bd8a85ecd482c6ac
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B 
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program 
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.  
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30 ** UTF-8 \e$B%5%]!<%H$K$D$$$F\e(B
31 **    \e$B=>Mh$N\e(B nkf \e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9\e(B
32 **    nkf -e \e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G\e(B UTF-8 \e$B$HH=Dj$5$l$l$P!"\e(B
33 **    \e$B$=$N$^$^\e(B euc-jp \e$B$KJQ49$5$l$^$9\e(B
34 **
35 **    \e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#\e(B
36 **    (\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O\e(B)
37 **
38 **    \e$B2?$+LdBj$r8+$D$1$?$i!"\e(B
39 **        E-Mail: furukawa@tcp-ip.or.jp
40 **    \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.129 2007/08/30 06:02:28 naruse Exp $ */
43 #define NKF_VERSION "2.0.8"
44 #define NKF_RELEASE_DATE "2007-08-30"
45 #include "config.h"
46 #include "utf8tbl.h"
47
48 #define COPY_RIGHT \
49     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
50     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
51
52
53 /*
54 **
55 **
56 **
57 ** USAGE:       nkf [flags] [file] 
58 **
59 ** Flags:
60 ** b    Output is buffered             (DEFAULT)
61 ** u    Output is unbuffered
62 **
63 ** t    no operation
64 **
65 ** j    Output code is JIS 7 bit        (DEFAULT SELECT) 
66 ** s    Output code is MS Kanji         (DEFAULT SELECT) 
67 ** e    Output code is AT&T JIS         (DEFAULT SELECT) 
68 ** w    Output code is AT&T JIS         (DEFAULT SELECT) 
69 ** l    Output code is JIS 7bit and ISO8859-1 Latin-1
70 **
71 ** m    MIME conversion for ISO-2022-JP
72 ** I    Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
73 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
74 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
75 ** M    MIME output conversion 
76 **
77 ** r  {de/en}crypt ROT13/47
78 **
79 ** v  display Version
80 **
81 ** T  Text mode output        (for MS-DOS)
82 **
83 ** x    Do not convert X0201 kana into X0208
84 ** Z    Convert X0208 alphabet to ASCII
85 **
86 ** f60  fold option
87 **
88 ** m    MIME decode
89 ** B    try to fix broken JIS, missing Escape
90 ** B[1-9]  broken level
91 **
92 ** O   Output to 'nkf.out' file or last file name
93 ** d   Delete \r in line feed 
94 ** c   Add \r in line feed 
95 ** -- other long option
96 ** -- ignore following option (don't use with -O )
97 **
98 **/
99
100 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
101 #define MSDOS
102 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
103 #define __WIN32__
104 #endif
105 #endif
106
107 #ifdef PERL_XS
108 #undef OVERWRITE
109 #endif
110
111 #ifndef PERL_XS
112 #include <stdio.h>
113 #endif
114
115 #include <stdlib.h>
116 #include <string.h>
117
118 #if defined(MSDOS) || defined(__OS2__)
119 #include <fcntl.h>
120 #include <io.h>
121 #if defined(_MSC_VER) || defined(__WATCOMC__)
122 #define mktemp _mktemp
123 #endif
124 #endif
125
126 #ifdef MSDOS
127 #ifdef LSI_C
128 #define setbinmode(fp) fsetbin(fp)
129 #elif defined(__DJGPP__)
130 #include <libc/dosio.h>
131 #define setbinmode(fp) djgpp_setbinmode(fp)
132 #else /* Microsoft C, Turbo C */
133 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
134 #endif
135 #else /* UNIX */
136 #define setbinmode(fp)
137 #endif
138
139 #if defined(__DJGPP__)
140 void  djgpp_setbinmode(FILE *fp)
141 {
142     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
143     int fd, m;
144     fd = fileno(fp);
145     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
146     __file_handle_set(fd, m);
147 }
148 #endif
149
150 #ifdef _IOFBF /* SysV and MSDOS, Windows */
151 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
152 #else /* BSD */
153 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
154 #endif
155
156 /*Borland C++ 4.5 EasyWin*/
157 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
158 #define         EASYWIN
159 #ifndef __WIN16__
160 #define __WIN16__
161 #endif
162 #include <windows.h>
163 #endif
164
165 #ifdef OVERWRITE
166 /* added by satoru@isoternet.org */
167 #if defined(__EMX__)
168 #include <sys/types.h>
169 #endif
170 #include <sys/stat.h>
171 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
172 #include <unistd.h>
173 #if defined(__WATCOMC__)
174 #include <sys/utime.h>
175 #else
176 #include <utime.h>
177 #endif
178 #else /* defined(MSDOS) */
179 #ifdef __WIN32__
180 #ifdef __BORLANDC__ /* BCC32 */
181 #include <utime.h>
182 #else /* !defined(__BORLANDC__) */
183 #include <sys/utime.h>
184 #endif /* (__BORLANDC__) */
185 #else /* !defined(__WIN32__) */
186 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
187 #include <sys/utime.h>
188 #elif defined(__TURBOC__) /* BCC */
189 #include <utime.h>
190 #elif defined(LSI_C) /* LSI C */
191 #endif /* (__WIN32__) */
192 #endif
193 #endif
194 #endif
195
196 #define         FALSE   0
197 #define         TRUE    1
198
199 /* state of output_mode and input_mode  
200
201    c2           0 means ASCII
202                 X0201
203                 ISO8859_1
204                 X0208
205                 EOF      all termination
206    c1           32bit data
207
208  */
209
210 #define         ASCII           0
211 #define         X0208           1
212 #define         X0201           2
213 #define         ISO8859_1       8
214 #define         NO_X0201        3
215 #define         X0212      0x2844
216 #define         X0213_1    0x284F
217 #define         X0213_2    0x2850
218
219 /* Input Assumption */
220
221 #define         JIS_INPUT       4
222 #define         EUC_INPUT      16
223 #define         SJIS_INPUT      5
224 #define         LATIN1_INPUT    6
225 #define         FIXED_MIME      7
226 #define         STRICT_MIME     8
227
228 /* MIME ENCODE */
229
230 #define         ISO2022JP       9
231 #define         JAPANESE_EUC   10
232 #define         SHIFT_JIS      11
233
234 #define         UTF8           12
235 #define         UTF8_INPUT     13
236 #define         UTF16_INPUT    1015
237 #define         UTF32_INPUT    1017
238
239 /* byte order */
240
241 #define         ENDIAN_BIG      1234
242 #define         ENDIAN_LITTLE   4321
243 #define         ENDIAN_2143     2143
244 #define         ENDIAN_3412     3412
245
246 #define         WISH_TRUE      15
247
248 /* ASCII CODE */
249
250 #define         BS      0x08
251 #define         TAB     0x09
252 #define         NL      0x0a
253 #define         CR      0x0d
254 #define         ESC     0x1b
255 #define         SPACE   0x20
256 #define         AT      0x40
257 #define         SSP     0xa0
258 #define         DEL     0x7f
259 #define         SI      0x0f
260 #define         SO      0x0e
261 #define         SSO     0x8e
262 #define         SS3     0x8f
263
264 #define         is_alnum(c)  \
265             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
266
267 /* I don't trust portablity of toupper */
268 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
269 #define nkf_isoctal(c)  ('0'<=c && c<='7')
270 #define nkf_isdigit(c)  ('0'<=c && c<='9')
271 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
272 #define nkf_isblank(c) (c == SPACE || c == TAB)
273 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
274 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
275 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
276 #define nkf_isprint(c) (' '<=c && c<='~')
277 #define nkf_isgraph(c) ('!'<=c && c<='~')
278 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
279                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
280                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0 )
281 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
282
283 #define CP932_TABLE_BEGIN 0xFA
284 #define CP932_TABLE_END   0xFC
285 #define CP932INV_TABLE_BEGIN 0xED
286 #define CP932INV_TABLE_END   0xEE
287 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
288
289 #define         HOLD_SIZE       1024
290 #if defined(INT_IS_SHORT)
291 #define         IOBUF_SIZE      2048
292 #else
293 #define         IOBUF_SIZE      16384
294 #endif
295
296 #define         DEFAULT_J       'B'
297 #define         DEFAULT_R       'B'
298
299 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
300 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
301
302 #define         RANGE_NUM_MAX   18
303 #define         GETA1   0x22
304 #define         GETA2   0x2e
305
306
307 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
308 #define sizeof_euc_to_utf8_1byte 94
309 #define sizeof_euc_to_utf8_2bytes 94
310 #define sizeof_utf8_to_euc_C2 64
311 #define sizeof_utf8_to_euc_E5B8 64
312 #define sizeof_utf8_to_euc_2bytes 112
313 #define sizeof_utf8_to_euc_3bytes 16
314 #endif
315
316 /* MIME preprocessor */
317
318 #ifdef EASYWIN /*Easy Win */
319 extern POINT _BufferSize;
320 #endif
321
322 struct input_code{
323     char *name;
324     nkf_char stat;
325     nkf_char score;
326     nkf_char index;
327     nkf_char buf[3];
328     void (*status_func)(struct input_code *, nkf_char);
329     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
330     int _file_stat;
331 };
332
333 static char *input_codename = "";
334
335 #ifndef PERL_XS
336 static const char *CopyRight = COPY_RIGHT;
337 #endif
338 #if !defined(PERL_XS) && !defined(WIN32DLL)
339 static  nkf_char     noconvert(FILE *f);
340 #endif
341 static  void    module_connection(void);
342 static  nkf_char     kanji_convert(FILE *f);
343 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
344 static  nkf_char     push_hold_buf(nkf_char c2);
345 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
346 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
347 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
348 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
349 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
350 /* UCS Mapping
351  * 0: Shift_JIS, eucJP-ascii
352  * 1: eucJP-ms
353  * 2: CP932, CP51932
354  * 3: CP10001
355  */
356 #define UCS_MAP_ASCII   0
357 #define UCS_MAP_MS      1
358 #define UCS_MAP_CP932   2
359 #define UCS_MAP_CP10001 3
360 static int ms_ucs_map_f = UCS_MAP_ASCII;
361 #endif
362 #ifdef UTF8_INPUT_ENABLE
363 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
364 static  int     no_cp932ext_f = FALSE;
365 /* ignore ZERO WIDTH NO-BREAK SPACE */
366 static  int     no_best_fit_chars_f = FALSE;
367 static  int     input_endian = ENDIAN_BIG;
368 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
369 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
370 static  void    encode_fallback_html(nkf_char c);
371 static  void    encode_fallback_xml(nkf_char c);
372 static  void    encode_fallback_java(nkf_char c);
373 static  void    encode_fallback_perl(nkf_char c);
374 static  void    encode_fallback_subchar(nkf_char c);
375 static  void    (*encode_fallback)(nkf_char c) = NULL;
376 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
377 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
378 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
379 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
380 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
381 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
382 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
383 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
384 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
385 static  void    w_status(struct input_code *, nkf_char);
386 #endif
387 #ifdef UTF8_OUTPUT_ENABLE
388 static  int     output_bom_f = FALSE;
389 static  int     output_endian = ENDIAN_BIG;
390 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
391 static  void    w_oconv(nkf_char c2,nkf_char c1);
392 static  void    w_oconv16(nkf_char c2,nkf_char c1);
393 static  void    w_oconv32(nkf_char c2,nkf_char c1);
394 #endif
395 static  void    e_oconv(nkf_char c2,nkf_char c1);
396 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
397 static  void    s_oconv(nkf_char c2,nkf_char c1);
398 static  void    j_oconv(nkf_char c2,nkf_char c1);
399 static  void    fold_conv(nkf_char c2,nkf_char c1);
400 static  void    cr_conv(nkf_char c2,nkf_char c1);
401 static  void    z_conv(nkf_char c2,nkf_char c1);
402 static  void    rot_conv(nkf_char c2,nkf_char c1);
403 static  void    hira_conv(nkf_char c2,nkf_char c1);
404 static  void    base64_conv(nkf_char c2,nkf_char c1);
405 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
406 static  void    no_connection(nkf_char c2,nkf_char c1);
407 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
408
409 static  void    code_score(struct input_code *ptr);
410 static  void    code_status(nkf_char c);
411
412 static  void    std_putc(nkf_char c);
413 static  nkf_char     std_getc(FILE *f);
414 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
415
416 static  nkf_char     broken_getc(FILE *f);
417 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
418
419 static  nkf_char     mime_begin(FILE *f);
420 static  nkf_char     mime_getc(FILE *f);
421 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
422
423 static  void    switch_mime_getc(void);
424 static  void    unswitch_mime_getc(void);
425 static  nkf_char     mime_begin_strict(FILE *f);
426 static  nkf_char     mime_getc_buf(FILE *f);
427 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
428 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
429
430 static  nkf_char     base64decode(nkf_char c);
431 static  void    mime_prechar(nkf_char c2, nkf_char c1);
432 static  void    mime_putc(nkf_char c);
433 static  void    open_mime(nkf_char c);
434 static  void    close_mime(void);
435 static  void    eof_mime(void);
436 static  void    mimeout_addchar(nkf_char c);
437 #ifndef PERL_XS
438 static  void    usage(void);
439 static  void    version(void);
440 #endif
441 static  void    options(unsigned char *c);
442 #if defined(PERL_XS) || defined(WIN32DLL)
443 static  void    reinit(void);
444 #endif
445
446 /* buffers */
447
448 #if !defined(PERL_XS) && !defined(WIN32DLL)
449 static unsigned char   stdibuf[IOBUF_SIZE];
450 static unsigned char   stdobuf[IOBUF_SIZE];
451 #endif
452 static unsigned char   hold_buf[HOLD_SIZE*2];
453 static int             hold_count = 0;
454
455 /* MIME preprocessor fifo */
456
457 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
458 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)   
459 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
460 static unsigned char           mime_buf[MIME_BUF_SIZE];
461 static unsigned int            mime_top = 0;
462 static unsigned int            mime_last = 0;  /* decoded */
463 static unsigned int            mime_input = 0; /* undecoded */
464 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
465
466 /* flags */
467 static int             unbuf_f = FALSE;
468 static int             estab_f = FALSE;
469 static int             nop_f = FALSE;
470 static int             binmode_f = TRUE;       /* binary mode */
471 static int             rot_f = FALSE;          /* rot14/43 mode */
472 static int             hira_f = FALSE;          /* hira/kata henkan */
473 static int             input_f = FALSE;        /* non fixed input code  */
474 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
475 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
476 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
477 static int             mimebuf_f = FALSE;      /* MIME buffered input */
478 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
479 static int             iso8859_f = FALSE;      /* ISO8859 through */
480 static int             mimeout_f = FALSE;       /* base64 mode */
481 #if defined(MSDOS) || defined(__OS2__) 
482 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
483 #else
484 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
485 #endif
486 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
487
488 #ifdef UNICODE_NORMALIZATION
489 static int nfc_f = FALSE;
490 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
491 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
492 static nkf_char nfc_getc(FILE *f);
493 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
494 #endif
495
496 #ifdef INPUT_OPTION
497 static int cap_f = FALSE;
498 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
499 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
500 static nkf_char cap_getc(FILE *f);
501 static nkf_char cap_ungetc(nkf_char c,FILE *f);
502
503 static int url_f = FALSE;
504 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
505 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
506 static nkf_char url_getc(FILE *f);
507 static nkf_char url_ungetc(nkf_char c,FILE *f);
508 #endif
509
510 #if defined(INT_IS_SHORT)
511 #define NKF_INT32_C(n)   (n##L)
512 #else
513 #define NKF_INT32_C(n)   (n)
514 #endif
515 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
516 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
517 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
518 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
519 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
520 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
521 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
522
523 #ifdef NUMCHAR_OPTION
524 static int numchar_f = FALSE;
525 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
526 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
527 static nkf_char numchar_getc(FILE *f);
528 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
529 #endif
530
531 #ifdef CHECK_OPTION
532 static int noout_f = FALSE;
533 static void no_putc(nkf_char c);
534 static nkf_char debug_f = FALSE;
535 static void debug(const char *str);
536 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
537 #endif
538
539 static int guess_f = FALSE;
540 #if !defined PERL_XS
541 static  void    print_guessed_code(char *filename);
542 #endif
543 static  void    set_input_codename(char *codename);
544 static int is_inputcode_mixed = FALSE;
545 static int is_inputcode_set   = FALSE;
546
547 #ifdef EXEC_IO
548 static int exec_f = 0;
549 #endif
550
551 #ifdef SHIFTJIS_CP932
552 /* invert IBM extended characters to others */
553 static int cp51932_f = FALSE;
554
555 /* invert NEC-selected IBM extended characters to IBM extended characters */
556 static int cp932inv_f = TRUE;
557
558 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
559 #endif /* SHIFTJIS_CP932 */
560
561 #ifdef X0212_ENABLE
562 static int x0212_f = FALSE;
563 static nkf_char x0212_shift(nkf_char c);
564 static nkf_char x0212_unshift(nkf_char c);
565 #endif
566 static int x0213_f = FALSE;
567
568 static unsigned char prefix_table[256];
569
570 static void set_code_score(struct input_code *ptr, nkf_char score);
571 static void clr_code_score(struct input_code *ptr, nkf_char score);
572 static void status_disable(struct input_code *ptr);
573 static void status_push_ch(struct input_code *ptr, nkf_char c);
574 static void status_clear(struct input_code *ptr);
575 static void status_reset(struct input_code *ptr);
576 static void status_reinit(struct input_code *ptr);
577 static void status_check(struct input_code *ptr, nkf_char c);
578 static void e_status(struct input_code *, nkf_char);
579 static void s_status(struct input_code *, nkf_char);
580
581 struct input_code input_code_list[] = {
582     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
583     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
584 #ifdef UTF8_INPUT_ENABLE
585     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
586     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
587     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
588 #endif
589     {0}
590 };
591
592 static int              mimeout_mode = 0;
593 static int              base64_count = 0;
594
595 /* X0208 -> ASCII converter */
596
597 /* fold parameter */
598 static int             f_line = 0;    /* chars in line */
599 static int             f_prev = 0;
600 static int             fold_preserve_f = FALSE; /* preserve new lines */
601 static int             fold_f  = FALSE;
602 static int             fold_len  = 0;
603
604 /* options */
605 static unsigned char   kanji_intro = DEFAULT_J;
606 static unsigned char   ascii_intro = DEFAULT_R;
607
608 /* Folding */
609
610 #define FOLD_MARGIN  10
611 #define DEFAULT_FOLD 60
612
613 static int             fold_margin  = FOLD_MARGIN;
614
615 /* converters */
616
617 #ifdef DEFAULT_CODE_JIS
618 #   define  DEFAULT_CONV j_oconv
619 #endif
620 #ifdef DEFAULT_CODE_SJIS
621 #   define  DEFAULT_CONV s_oconv
622 #endif
623 #ifdef DEFAULT_CODE_EUC
624 #   define  DEFAULT_CONV e_oconv
625 #endif
626 #ifdef DEFAULT_CODE_UTF8
627 #   define  DEFAULT_CONV w_oconv
628 #endif
629
630 /* process default */
631 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
632
633 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
634 /* s_iconv or oconv */
635 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
636
637 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
638 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
639 static void (*o_crconv)(nkf_char c2,nkf_char c1) = no_connection;
640 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
641 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
642 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
643 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
644
645 /* static redirections */
646
647 static  void   (*o_putc)(nkf_char c) = std_putc;
648
649 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
650 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
651
652 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
653 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
654
655 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
656
657 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
658 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
659
660 /* for strict mime */
661 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
662 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
663
664 /* Global states */
665 static int output_mode = ASCII,    /* output kanji mode */
666            input_mode =  ASCII,    /* input kanji mode */
667            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
668 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
669
670 /* X0201 / X0208 conversion tables */
671
672 /* X0201 kana conversion table */
673 /* 90-9F A0-DF */
674 static const
675 unsigned char cv[]= {
676     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
677     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
678     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
679     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
680     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
681     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
682     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
683     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
684     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
685     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
686     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
687     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
688     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
689     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
690     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
691     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
692     0x00,0x00};
693
694
695 /* X0201 kana conversion table for daguten */
696 /* 90-9F A0-DF */
697 static const
698 unsigned char dv[]= { 
699     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
700     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
701     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
702     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
703     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
704     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
705     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
706     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
707     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
708     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
709     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
710     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
711     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
712     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
713     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
714     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
715     0x00,0x00};
716
717 /* X0201 kana conversion table for han-daguten */
718 /* 90-9F A0-DF */
719 static const
720 unsigned char ev[]= { 
721     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
722     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
723     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
724     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
725     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
726     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
727     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
728     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
729     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
730     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
731     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
732     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
733     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
734     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
735     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
736     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
737     0x00,0x00};
738
739
740 /* X0208 kigou conversion table */
741 /* 0x8140 - 0x819e */
742 static const
743 unsigned char fv[] = {
744
745     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
746     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
747     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
748     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
749     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
750     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
751     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
752     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
753     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
754     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
755     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
756     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
757 } ;
758
759
760 #define    CRLF      1
761
762 static int             file_out_f = FALSE;
763 #ifdef OVERWRITE
764 static int             overwrite_f = FALSE;
765 static int             preserve_time_f = FALSE;
766 static int             backup_f = FALSE;
767 static char            *backup_suffix = "";
768 static char *get_backup_filename(const char *suffix, const char *filename);
769 #endif
770
771 static int             crmode_f = 0;   /* CR, NL, CRLF */
772 static nkf_char prev_cr = 0;
773 #ifdef EASYWIN /*Easy Win */
774 static int             end_check;
775 #endif /*Easy Win */
776
777 #define STD_GC_BUFSIZE (256)
778 nkf_char std_gc_buf[STD_GC_BUFSIZE];
779 nkf_char std_gc_ndx;
780
781 #ifdef WIN32DLL
782 #include "nkf32dll.c"
783 #elif defined(PERL_XS)
784 #else /* WIN32DLL */
785 int main(int argc, char **argv)
786 {
787     FILE  *fin;
788     unsigned char  *cp;
789
790     char *outfname = NULL;
791     char *origfname;
792
793 #ifdef EASYWIN /*Easy Win */
794     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
795 #endif
796
797     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
798         cp = (unsigned char *)*argv;
799         options(cp);
800 #ifdef EXEC_IO
801         if (exec_f){
802             int fds[2], pid;
803             if (pipe(fds) < 0 || (pid = fork()) < 0){
804                 abort();
805             }
806             if (pid == 0){
807                 if (exec_f > 0){
808                     close(fds[0]);
809                     dup2(fds[1], 1);
810                 }else{
811                     close(fds[1]);
812                     dup2(fds[0], 0);
813                 }
814                 execvp(argv[1], &argv[1]);
815             }
816             if (exec_f > 0){
817                 close(fds[1]);
818                 dup2(fds[0], 0);
819             }else{
820                 close(fds[0]);
821                 dup2(fds[1], 1);
822             }
823             argc = 0;
824             break;
825         }
826 #endif
827     }
828     if(x0201_f == WISH_TRUE)
829          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
830
831     if (binmode_f == TRUE)
832 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
833     if (freopen("","wb",stdout) == NULL) 
834         return (-1);
835 #else
836     setbinmode(stdout);
837 #endif
838
839     if (unbuf_f)
840       setbuf(stdout, (char *) NULL);
841     else
842       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
843
844     if (argc == 0) {
845       if (binmode_f == TRUE)
846 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
847       if (freopen("","rb",stdin) == NULL) return (-1);
848 #else
849       setbinmode(stdin);
850 #endif
851       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
852       if (nop_f)
853           noconvert(stdin);
854       else {
855           kanji_convert(stdin);
856           if (guess_f) print_guessed_code(NULL);
857       }
858     } else {
859       int nfiles = argc;
860         int is_argument_error = FALSE;
861       while (argc--) {
862             is_inputcode_mixed = FALSE;
863             is_inputcode_set   = FALSE;
864             input_codename = "";
865 #ifdef CHECK_OPTION
866             iconv_for_check = 0;
867 #endif
868           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
869               perror(*--argv);
870                 *argv++;
871                 is_argument_error = TRUE;
872                 continue;
873           } else {
874 #ifdef OVERWRITE
875               int fd = 0;
876               int fd_backup = 0;
877 #endif
878
879 /* reopen file for stdout */
880               if (file_out_f == TRUE) {
881 #ifdef OVERWRITE
882                   if (overwrite_f){
883                       outfname = malloc(strlen(origfname)
884                                         + strlen(".nkftmpXXXXXX")
885                                         + 1);
886                       if (!outfname){
887                           perror(origfname);
888                           return -1;
889                       }
890                       strcpy(outfname, origfname);
891 #ifdef MSDOS
892                       {
893                           int i;
894                           for (i = strlen(outfname); i; --i){
895                               if (outfname[i - 1] == '/'
896                                   || outfname[i - 1] == '\\'){
897                                   break;
898                               }
899                           }
900                           outfname[i] = '\0';
901                       }
902                       strcat(outfname, "ntXXXXXX");
903                       mktemp(outfname);
904                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
905                                 S_IREAD | S_IWRITE);
906 #else
907                       strcat(outfname, ".nkftmpXXXXXX");
908                       fd = mkstemp(outfname);
909 #endif
910                       if (fd < 0
911                           || (fd_backup = dup(fileno(stdout))) < 0
912                           || dup2(fd, fileno(stdout)) < 0
913                           ){
914                           perror(origfname);
915                           return -1;
916                       }
917                   }else
918 #endif
919                   if(argc == 1 ) {
920                       outfname = *argv++;
921                       argc--;
922                   } else {
923                       outfname = "nkf.out";
924                   }
925
926                   if(freopen(outfname, "w", stdout) == NULL) {
927                       perror (outfname);
928                       return (-1);
929                   }
930                   if (binmode_f == TRUE) {
931 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
932                       if (freopen("","wb",stdout) == NULL) 
933                            return (-1);
934 #else
935                       setbinmode(stdout);
936 #endif
937                   }
938               }
939               if (binmode_f == TRUE)
940 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
941                  if (freopen("","rb",fin) == NULL) 
942                     return (-1);
943 #else
944                  setbinmode(fin);
945 #endif 
946               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
947               if (nop_f)
948                   noconvert(fin);
949               else {
950                   char *filename = NULL;
951                   kanji_convert(fin);
952                   if (nfiles > 1) filename = origfname;
953                   if (guess_f) print_guessed_code(filename);
954               }
955               fclose(fin);
956 #ifdef OVERWRITE
957               if (overwrite_f) {
958                   struct stat     sb;
959 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
960                   time_t tb[2];
961 #else
962                   struct utimbuf  tb;
963 #endif
964
965                   fflush(stdout);
966                   close(fd);
967                   if (dup2(fd_backup, fileno(stdout)) < 0){
968                       perror("dup2");
969                   }
970                   if (stat(origfname, &sb)) {
971                       fprintf(stderr, "Can't stat %s\n", origfname);
972                   }
973                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
974                   if (chmod(outfname, sb.st_mode)) {
975                       fprintf(stderr, "Can't set permission %s\n", outfname);
976                   }
977
978                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
979                     if(preserve_time_f){
980 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
981                         tb[0] = tb[1] = sb.st_mtime;
982                         if (utime(outfname, tb)) {
983                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
984                         }
985 #else
986                         tb.actime  = sb.st_atime;
987                         tb.modtime = sb.st_mtime;
988                         if (utime(outfname, &tb)) {
989                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
990                         }
991 #endif
992                     }
993                     if(backup_f){
994                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
995 #ifdef MSDOS
996                         unlink(backup_filename);
997 #endif
998                         if (rename(origfname, backup_filename)) {
999                             perror(backup_filename);
1000                             fprintf(stderr, "Can't rename %s to %s\n",
1001                                     origfname, backup_filename);
1002                         }
1003                     }else{
1004 #ifdef MSDOS
1005                         if (unlink(origfname)){
1006                             perror(origfname);
1007                         }
1008 #endif
1009                     }
1010                   if (rename(outfname, origfname)) {
1011                       perror(origfname);
1012                       fprintf(stderr, "Can't rename %s to %s\n",
1013                               outfname, origfname);
1014                   }
1015                   free(outfname);
1016               }
1017 #endif
1018           }
1019       }
1020         if (is_argument_error)
1021             return(-1);
1022     }
1023 #ifdef EASYWIN /*Easy Win */
1024     if (file_out_f == FALSE) 
1025         scanf("%d",&end_check);
1026     else 
1027         fclose(stdout);
1028 #else /* for Other OS */
1029     if (file_out_f == TRUE) 
1030         fclose(stdout);
1031 #endif /*Easy Win */
1032     return (0);
1033 }
1034 #endif /* WIN32DLL */
1035
1036 #ifdef OVERWRITE
1037 char *get_backup_filename(const char *suffix, const char *filename)
1038 {
1039     char *backup_filename;
1040     int asterisk_count = 0;
1041     int i, j;
1042     int filename_length = strlen(filename);
1043
1044     for(i = 0; suffix[i]; i++){
1045         if(suffix[i] == '*') asterisk_count++;
1046     }
1047
1048     if(asterisk_count){
1049         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1050         if (!backup_filename){
1051             perror("Can't malloc backup filename.");
1052             return NULL;
1053         }
1054
1055         for(i = 0, j = 0; suffix[i];){
1056             if(suffix[i] == '*'){
1057                 backup_filename[j] = '\0';
1058                 strncat(backup_filename, filename, filename_length);
1059                 i++;
1060                 j += filename_length;
1061             }else{
1062                 backup_filename[j++] = suffix[i++];
1063             }
1064         }
1065         backup_filename[j] = '\0';
1066     }else{
1067         j = strlen(suffix) + filename_length;
1068         backup_filename = malloc( + 1);
1069         strcpy(backup_filename, filename);
1070         strcat(backup_filename, suffix);
1071         backup_filename[j] = '\0';
1072     }
1073     return backup_filename;
1074 }
1075 #endif
1076
1077 static const
1078 struct {
1079     const char *name;
1080     const char *alias;
1081 } long_option[] = {
1082     {"ic=", ""},
1083     {"oc=", ""},
1084     {"base64","jMB"},
1085     {"euc","e"},
1086     {"euc-input","E"},
1087     {"fj","jm"},
1088     {"help","v"},
1089     {"jis","j"},
1090     {"jis-input","J"},
1091     {"mac","sLm"},
1092     {"mime","jM"},
1093     {"mime-input","m"},
1094     {"msdos","sLw"},
1095     {"sjis","s"},
1096     {"sjis-input","S"},
1097     {"unix","eLu"},
1098     {"version","V"},
1099     {"windows","sLw"},
1100     {"hiragana","h1"},
1101     {"katakana","h2"},
1102     {"katakana-hiragana","h3"},
1103     {"guess", "g"},
1104     {"cp932", ""},
1105     {"no-cp932", ""},
1106 #ifdef X0212_ENABLE
1107     {"x0212", ""},
1108 #endif
1109 #ifdef UTF8_OUTPUT_ENABLE
1110     {"utf8", "w"},
1111     {"utf16", "w16"},
1112     {"ms-ucs-map", ""},
1113     {"fb-skip", ""},
1114     {"fb-html", ""},
1115     {"fb-xml", ""},
1116     {"fb-perl", ""},
1117     {"fb-java", ""},
1118     {"fb-subchar", ""},
1119     {"fb-subchar=", ""},
1120 #endif
1121 #ifdef UTF8_INPUT_ENABLE
1122     {"utf8-input", "W"},
1123     {"utf16-input", "W16"},
1124     {"no-cp932ext", ""},
1125     {"no-best-fit-chars",""},
1126 #endif
1127 #ifdef UNICODE_NORMALIZATION
1128     {"utf8mac-input", ""},
1129 #endif
1130 #ifdef OVERWRITE
1131     {"overwrite", ""},
1132     {"overwrite=", ""},
1133     {"in-place", ""},
1134     {"in-place=", ""},
1135 #endif
1136 #ifdef INPUT_OPTION
1137     {"cap-input", ""},
1138     {"url-input", ""},
1139 #endif
1140 #ifdef NUMCHAR_OPTION
1141     {"numchar-input", ""},
1142 #endif
1143 #ifdef CHECK_OPTION
1144     {"no-output", ""},
1145     {"debug", ""},
1146 #endif
1147 #ifdef SHIFTJIS_CP932
1148     {"cp932inv", ""},
1149 #endif
1150 #ifdef EXEC_IO
1151     {"exec-in", ""},
1152     {"exec-out", ""},
1153 #endif
1154     {"prefix=", ""},
1155 };
1156
1157 static int option_mode = 0;
1158
1159 void options(unsigned char *cp)
1160 {
1161     nkf_char i, j;
1162     unsigned char *p;
1163     unsigned char *cp_back = NULL;
1164     char codeset[32];
1165
1166     if (option_mode==1)
1167         return;
1168     while(*cp && *cp++!='-');
1169     while (*cp || cp_back) {
1170         if(!*cp){
1171             cp = cp_back;
1172             cp_back = NULL;
1173             continue;
1174         }
1175         p = 0;
1176         switch (*cp++) {
1177         case '-':  /* literal options */
1178             if (!*cp || *cp == SPACE) {        /* ignore the rest of arguments */
1179                 option_mode = 1;
1180                 return;
1181             }
1182             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1183                 p = (unsigned char *)long_option[i].name;
1184                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1185                 if (*p == cp[j] || cp[j] == ' '){
1186                     p = &cp[j] + 1;
1187                     break;
1188                 }
1189                 p = 0;
1190             }
1191             if (p == 0) return;
1192             while(*cp && *cp != SPACE && cp++);
1193             if (long_option[i].alias[0]){
1194                 cp_back = cp;
1195                 cp = (unsigned char *)long_option[i].alias;
1196             }else{
1197                 if (strcmp(long_option[i].name, "ic=") == 0){
1198                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1199                         codeset[i] = nkf_toupper(p[i]);
1200                     }
1201                     codeset[i] = 0;
1202                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1203                         input_f = JIS_INPUT;
1204                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1205                       strcmp(codeset, "CP50220") == 0 ||
1206                       strcmp(codeset, "CP50221") == 0 ||
1207                       strcmp(codeset, "CP50222") == 0){
1208                         input_f = JIS_INPUT;
1209 #ifdef SHIFTJIS_CP932
1210                         cp51932_f = TRUE;
1211 #endif
1212 #ifdef UTF8_OUTPUT_ENABLE
1213                         ms_ucs_map_f = UCS_MAP_CP932;
1214 #endif
1215                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1216                         input_f = JIS_INPUT;
1217 #ifdef X0212_ENABLE
1218                         x0212_f = TRUE;
1219 #endif
1220                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1221                         input_f = JIS_INPUT;
1222 #ifdef X0212_ENABLE
1223                         x0212_f = TRUE;
1224 #endif
1225                         x0213_f = TRUE;
1226                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1227                         input_f = SJIS_INPUT;
1228                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1229                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1230                              strcmp(codeset, "CP932") == 0 ||
1231                              strcmp(codeset, "MS932") == 0){
1232                         input_f = SJIS_INPUT;
1233 #ifdef SHIFTJIS_CP932
1234                         cp51932_f = TRUE;
1235 #endif
1236 #ifdef UTF8_OUTPUT_ENABLE
1237                         ms_ucs_map_f = UCS_MAP_CP932;
1238 #endif
1239                     }else if(strcmp(codeset, "CP10001") == 0){
1240                         input_f = SJIS_INPUT;
1241 #ifdef SHIFTJIS_CP932
1242                         cp51932_f = TRUE;
1243 #endif
1244 #ifdef UTF8_OUTPUT_ENABLE
1245                         ms_ucs_map_f = UCS_MAP_CP10001;
1246 #endif
1247                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1248                              strcmp(codeset, "EUC-JP") == 0){
1249                         input_f = EUC_INPUT;
1250                     }else if(strcmp(codeset, "CP51932") == 0){
1251                         input_f = EUC_INPUT;
1252 #ifdef SHIFTJIS_CP932
1253                         cp51932_f = TRUE;
1254 #endif
1255 #ifdef UTF8_OUTPUT_ENABLE
1256                         ms_ucs_map_f = UCS_MAP_CP932;
1257 #endif
1258                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1259                              strcmp(codeset, "EUCJP-MS") == 0 ||
1260                              strcmp(codeset, "EUCJPMS") == 0){
1261                         input_f = EUC_INPUT;
1262 #ifdef SHIFTJIS_CP932
1263                         cp51932_f = FALSE;
1264 #endif
1265 #ifdef UTF8_OUTPUT_ENABLE
1266                         ms_ucs_map_f = UCS_MAP_MS;
1267 #endif
1268                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1269                              strcmp(codeset, "EUCJP-ASCII") == 0){
1270                         input_f = EUC_INPUT;
1271 #ifdef SHIFTJIS_CP932
1272                         cp51932_f = FALSE;
1273 #endif
1274 #ifdef UTF8_OUTPUT_ENABLE
1275                         ms_ucs_map_f = UCS_MAP_ASCII;
1276 #endif
1277                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1278                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1279                         input_f = SJIS_INPUT;
1280                         x0213_f = TRUE;
1281 #ifdef SHIFTJIS_CP932
1282                         cp51932_f = FALSE;
1283 #endif
1284                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1285                              strcmp(codeset, "EUC-JIS-2004") == 0){
1286                         input_f = EUC_INPUT;
1287                         x0213_f = TRUE;
1288 #ifdef SHIFTJIS_CP932
1289                         cp51932_f = FALSE;
1290 #endif
1291 #ifdef UTF8_INPUT_ENABLE
1292                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1293                              strcmp(codeset, "UTF-8N") == 0 ||
1294                              strcmp(codeset, "UTF-8-BOM") == 0){
1295                         input_f = UTF8_INPUT;
1296 #ifdef UNICODE_NORMALIZATION
1297                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1298                              strcmp(codeset, "UTF-8-MAC") == 0){
1299                         input_f = UTF8_INPUT;
1300                         nfc_f = TRUE;
1301 #endif
1302                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1303                              strcmp(codeset, "UTF-16BE") == 0 ||
1304                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1305                         input_f = UTF16_INPUT;
1306                         input_endian = ENDIAN_BIG;
1307                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1308                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1309                         input_f = UTF16_INPUT;
1310                         input_endian = ENDIAN_LITTLE;
1311                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1312                              strcmp(codeset, "UTF-32BE") == 0 ||
1313                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1314                         input_f = UTF32_INPUT;
1315                         input_endian = ENDIAN_BIG;
1316                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1317                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1318                         input_f = UTF32_INPUT;
1319                         input_endian = ENDIAN_LITTLE;
1320 #endif
1321                     }
1322                     continue;
1323                 }
1324                 if (strcmp(long_option[i].name, "oc=") == 0){
1325                     x0201_f = FALSE;
1326                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1327                         codeset[i] = nkf_toupper(p[i]);
1328                     }
1329                     codeset[i] = 0;
1330                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1331                         output_conv = j_oconv;
1332                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1333                         output_conv = j_oconv;
1334                         no_cp932ext_f = TRUE;
1335 #ifdef SHIFTJIS_CP932
1336                         cp932inv_f = FALSE;
1337 #endif
1338 #ifdef UTF8_OUTPUT_ENABLE
1339                         ms_ucs_map_f = UCS_MAP_CP932;
1340 #endif
1341                     }else if(strcmp(codeset, "CP50220") == 0){
1342                         output_conv = j_oconv;
1343                         x0201_f = TRUE;
1344 #ifdef SHIFTJIS_CP932
1345                         cp932inv_f = FALSE;
1346 #endif
1347 #ifdef UTF8_OUTPUT_ENABLE
1348                         ms_ucs_map_f = UCS_MAP_CP932;
1349 #endif
1350                     }else if(strcmp(codeset, "CP50221") == 0){
1351                         output_conv = j_oconv;
1352 #ifdef SHIFTJIS_CP932
1353                         cp932inv_f = FALSE;
1354 #endif
1355 #ifdef UTF8_OUTPUT_ENABLE
1356                         ms_ucs_map_f = UCS_MAP_CP932;
1357 #endif
1358                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1359                         output_conv = j_oconv;
1360 #ifdef X0212_ENABLE
1361                         x0212_f = TRUE;
1362 #endif
1363 #ifdef SHIFTJIS_CP932
1364                         cp932inv_f = FALSE;
1365 #endif
1366                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1367                         output_conv = j_oconv;
1368 #ifdef X0212_ENABLE
1369                         x0212_f = TRUE;
1370 #endif
1371                         x0213_f = TRUE;
1372 #ifdef SHIFTJIS_CP932
1373                         cp932inv_f = FALSE;
1374 #endif
1375                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1376                         output_conv = s_oconv;
1377                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1378                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1379                              strcmp(codeset, "CP932") == 0 ||
1380                              strcmp(codeset, "MS932") == 0){
1381                         output_conv = s_oconv;
1382 #ifdef UTF8_OUTPUT_ENABLE
1383                         ms_ucs_map_f = UCS_MAP_CP932;
1384 #endif
1385                     }else if(strcmp(codeset, "CP10001") == 0){
1386                         output_conv = s_oconv;
1387 #ifdef UTF8_OUTPUT_ENABLE
1388                         ms_ucs_map_f = UCS_MAP_CP10001;
1389 #endif
1390                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1391                              strcmp(codeset, "EUC-JP") == 0){
1392                         output_conv = e_oconv;
1393                     }else if(strcmp(codeset, "CP51932") == 0){
1394                         output_conv = e_oconv;
1395 #ifdef SHIFTJIS_CP932
1396                         cp932inv_f = FALSE;
1397 #endif
1398 #ifdef UTF8_OUTPUT_ENABLE
1399                         ms_ucs_map_f = UCS_MAP_CP932;
1400 #endif
1401                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1402                              strcmp(codeset, "EUCJP-MS") == 0 ||
1403                              strcmp(codeset, "EUCJPMS") == 0){
1404                         output_conv = e_oconv;
1405 #ifdef X0212_ENABLE
1406                         x0212_f = TRUE;
1407 #endif
1408 #ifdef UTF8_OUTPUT_ENABLE
1409                         ms_ucs_map_f = UCS_MAP_MS;
1410 #endif
1411                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1412                              strcmp(codeset, "EUCJP-ASCII") == 0){
1413                         output_conv = e_oconv;
1414 #ifdef X0212_ENABLE
1415                         x0212_f = TRUE;
1416 #endif
1417 #ifdef UTF8_OUTPUT_ENABLE
1418                         ms_ucs_map_f = UCS_MAP_ASCII;
1419 #endif
1420                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1421                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1422                         output_conv = s_oconv;
1423                         x0213_f = TRUE;
1424 #ifdef SHIFTJIS_CP932
1425                         cp932inv_f = FALSE;
1426 #endif
1427                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1428                              strcmp(codeset, "EUC-JIS-2004") == 0){
1429                         output_conv = e_oconv;
1430 #ifdef X0212_ENABLE
1431                         x0212_f = TRUE;
1432 #endif
1433                         x0213_f = TRUE;
1434 #ifdef SHIFTJIS_CP932
1435                         cp932inv_f = FALSE;
1436 #endif
1437 #ifdef UTF8_OUTPUT_ENABLE
1438                     }else if(strcmp(codeset, "UTF-8") == 0){
1439                         output_conv = w_oconv;
1440                     }else if(strcmp(codeset, "UTF-8N") == 0){
1441                         output_conv = w_oconv;
1442                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1443                         output_conv = w_oconv;
1444                         output_bom_f = TRUE;
1445                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1446                         output_conv = w_oconv16;
1447                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1448                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1449                         output_conv = w_oconv16;
1450                         output_bom_f = TRUE;
1451                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1452                         output_conv = w_oconv16;
1453                         output_endian = ENDIAN_LITTLE;
1454                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1455                         output_conv = w_oconv16;
1456                         output_endian = ENDIAN_LITTLE;
1457                         output_bom_f = TRUE;
1458                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1459                              strcmp(codeset, "UTF-32BE") == 0){
1460                         output_conv = w_oconv32;
1461                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1462                         output_conv = w_oconv32;
1463                         output_bom_f = TRUE;
1464                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1465                         output_conv = w_oconv32;
1466                         output_endian = ENDIAN_LITTLE;
1467                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1468                         output_conv = w_oconv32;
1469                         output_endian = ENDIAN_LITTLE;
1470                         output_bom_f = TRUE;
1471 #endif
1472                     }
1473                     continue;
1474                 }
1475 #ifdef OVERWRITE
1476                 if (strcmp(long_option[i].name, "overwrite") == 0){
1477                     file_out_f = TRUE;
1478                     overwrite_f = TRUE;
1479                     preserve_time_f = TRUE;
1480                     continue;
1481                 }
1482                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1483                     file_out_f = TRUE;
1484                     overwrite_f = TRUE;
1485                     preserve_time_f = TRUE;
1486                     backup_f = TRUE;
1487                     backup_suffix = malloc(strlen((char *) p) + 1);
1488                     strcpy(backup_suffix, (char *) p);
1489                     continue;
1490                 }
1491                 if (strcmp(long_option[i].name, "in-place") == 0){
1492                     file_out_f = TRUE;
1493                     overwrite_f = TRUE;
1494                     preserve_time_f = FALSE;
1495                     continue;
1496                 }
1497                 if (strcmp(long_option[i].name, "in-place=") == 0){
1498                     file_out_f = TRUE;
1499                     overwrite_f = TRUE;
1500                     preserve_time_f = FALSE;
1501                     backup_f = TRUE;
1502                     backup_suffix = malloc(strlen((char *) p) + 1);
1503                     strcpy(backup_suffix, (char *) p);
1504                     continue;
1505                 }
1506 #endif
1507 #ifdef INPUT_OPTION
1508                 if (strcmp(long_option[i].name, "cap-input") == 0){
1509                     cap_f = TRUE;
1510                     continue;
1511                 }
1512                 if (strcmp(long_option[i].name, "url-input") == 0){
1513                     url_f = TRUE;
1514                     continue;
1515                 }
1516 #endif
1517 #ifdef NUMCHAR_OPTION
1518                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1519                     numchar_f = TRUE;
1520                     continue;
1521                 }
1522 #endif
1523 #ifdef CHECK_OPTION
1524                 if (strcmp(long_option[i].name, "no-output") == 0){
1525                     noout_f = TRUE;
1526                     continue;
1527                 }
1528                 if (strcmp(long_option[i].name, "debug") == 0){
1529                     debug_f = TRUE;
1530                     continue;
1531                 }
1532 #endif
1533                 if (strcmp(long_option[i].name, "cp932") == 0){
1534 #ifdef SHIFTJIS_CP932
1535                     cp51932_f = TRUE;
1536                     cp932inv_f = TRUE;
1537 #endif
1538 #ifdef UTF8_OUTPUT_ENABLE
1539                     ms_ucs_map_f = UCS_MAP_CP932;
1540 #endif
1541                     continue;
1542                 }
1543                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1544 #ifdef SHIFTJIS_CP932
1545                     cp51932_f = FALSE;
1546                     cp932inv_f = FALSE;
1547 #endif
1548 #ifdef UTF8_OUTPUT_ENABLE
1549                     ms_ucs_map_f = UCS_MAP_ASCII;
1550 #endif
1551                     continue;
1552                 }
1553 #ifdef SHIFTJIS_CP932
1554                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1555                     cp932inv_f = TRUE;
1556                     continue;
1557                 }
1558 #endif
1559
1560 #ifdef X0212_ENABLE
1561                 if (strcmp(long_option[i].name, "x0212") == 0){
1562                     x0212_f = TRUE;
1563                     continue;
1564                 }
1565 #endif
1566
1567 #ifdef EXEC_IO
1568                   if (strcmp(long_option[i].name, "exec-in") == 0){
1569                       exec_f = 1;
1570                       return;
1571                   }
1572                   if (strcmp(long_option[i].name, "exec-out") == 0){
1573                       exec_f = -1;
1574                       return;
1575                   }
1576 #endif
1577 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1578                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1579                     no_cp932ext_f = TRUE;
1580                     continue;
1581                 }
1582                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1583                     no_best_fit_chars_f = TRUE;
1584                     continue;
1585                 }
1586                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1587                     encode_fallback = NULL;
1588                     continue;
1589                 }
1590                 if (strcmp(long_option[i].name, "fb-html") == 0){
1591                     encode_fallback = encode_fallback_html;
1592                     continue;
1593                 }
1594                 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1595                     encode_fallback = encode_fallback_xml;
1596                     continue;
1597                 }
1598                 if (strcmp(long_option[i].name, "fb-java") == 0){
1599                     encode_fallback = encode_fallback_java;
1600                     continue;
1601                 }
1602                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1603                     encode_fallback = encode_fallback_perl;
1604                     continue;
1605                 }
1606                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1607                     encode_fallback = encode_fallback_subchar;
1608                     continue;
1609                 }
1610                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1611                     encode_fallback = encode_fallback_subchar;
1612                     unicode_subchar = 0;
1613                     if (p[0] != '0'){
1614                         /* decimal number */
1615                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1616                             unicode_subchar *= 10;
1617                             unicode_subchar += hex2bin(p[i]);
1618                         }
1619                     }else if(p[1] == 'x' || p[1] == 'X'){
1620                         /* hexadecimal number */
1621                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1622                             unicode_subchar <<= 4;
1623                             unicode_subchar |= hex2bin(p[i]);
1624                         }
1625                     }else{
1626                         /* octal number */
1627                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1628                             unicode_subchar *= 8;
1629                             unicode_subchar += hex2bin(p[i]);
1630                         }
1631                     }
1632                     w16e_conv(unicode_subchar, &i, &j);
1633                     unicode_subchar = i<<8 | j;
1634                     continue;
1635                 }
1636 #endif
1637 #ifdef UTF8_OUTPUT_ENABLE
1638                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1639                     ms_ucs_map_f = UCS_MAP_MS;
1640                     continue;
1641                 }
1642 #endif
1643 #ifdef UNICODE_NORMALIZATION
1644                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1645                     input_f = UTF8_INPUT;
1646                     nfc_f = TRUE;
1647                     continue;
1648                 }
1649 #endif
1650                 if (strcmp(long_option[i].name, "prefix=") == 0){
1651                     if (nkf_isgraph(p[0])){
1652                         for (i = 1; nkf_isgraph(p[i]); i++){
1653                             prefix_table[p[i]] = p[0];
1654                         }
1655                     }
1656                     continue;
1657                 }
1658             }
1659             continue;
1660         case 'b':           /* buffered mode */
1661             unbuf_f = FALSE;
1662             continue;
1663         case 'u':           /* non bufferd mode */
1664             unbuf_f = TRUE;
1665             continue;
1666         case 't':           /* transparent mode */
1667             if (*cp=='1') {
1668                 /* alias of -t */
1669                 nop_f = TRUE;
1670                 *cp++;
1671             } else if (*cp=='2') {
1672                 /*
1673                  * -t with put/get
1674                  *
1675                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1676                  *
1677                  */
1678                 nop_f = 2;
1679                 *cp++;
1680             } else
1681                 nop_f = TRUE;
1682             continue;
1683         case 'j':           /* JIS output */
1684         case 'n':
1685             output_conv = j_oconv;
1686             continue;
1687         case 'e':           /* AT&T EUC output */
1688             output_conv = e_oconv;
1689             cp932inv_f = FALSE;
1690             continue;
1691         case 's':           /* SJIS output */
1692             output_conv = s_oconv;
1693             continue;
1694         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1695             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1696             input_f = LATIN1_INPUT;
1697             continue;
1698         case 'i':           /* Kanji IN ESC-$-@/B */
1699             if (*cp=='@'||*cp=='B') 
1700                 kanji_intro = *cp++;
1701             continue;
1702         case 'o':           /* ASCII IN ESC-(-J/B */
1703             if (*cp=='J'||*cp=='B'||*cp=='H') 
1704                 ascii_intro = *cp++;
1705             continue;
1706         case 'h':
1707             /*  
1708                 bit:1   katakana->hiragana
1709                 bit:2   hiragana->katakana
1710             */
1711             if ('9'>= *cp && *cp>='0') 
1712                 hira_f |= (*cp++ -'0');
1713             else 
1714                 hira_f |= 1;
1715             continue;
1716         case 'r':
1717             rot_f = TRUE;
1718             continue;
1719 #if defined(MSDOS) || defined(__OS2__) 
1720         case 'T':
1721             binmode_f = FALSE;
1722             continue;
1723 #endif
1724 #ifndef PERL_XS
1725         case 'V':
1726             version();
1727             exit(1);
1728             break;
1729         case 'v':
1730             usage();
1731             exit(1);
1732             break;
1733 #endif
1734 #ifdef UTF8_OUTPUT_ENABLE
1735         case 'w':           /* UTF-8 output */
1736             if (cp[0] == '8') {
1737                 output_conv = w_oconv; cp++;
1738                 if (cp[0] == '0'){
1739                     cp++;
1740                 } else {
1741                     output_bom_f = TRUE;
1742                 }
1743             } else {
1744                 if ('1'== cp[0] && '6'==cp[1]) {
1745                     output_conv = w_oconv16; cp+=2;
1746                 } else if ('3'== cp[0] && '2'==cp[1]) {
1747                     output_conv = w_oconv32; cp+=2;
1748                 } else {
1749                     output_conv = w_oconv;
1750                     continue;
1751                 }
1752                 if (cp[0]=='L') {
1753                     cp++;
1754                     output_endian = ENDIAN_LITTLE;
1755                 } else if (cp[0] == 'B') {
1756                     cp++;
1757                 } else {
1758                     continue;
1759                 }
1760                 if (cp[0] == '0'){
1761                     cp++;
1762                 } else {
1763                     output_bom_f = TRUE;
1764                 }
1765             }
1766             continue;
1767 #endif
1768 #ifdef UTF8_INPUT_ENABLE
1769         case 'W':           /* UTF input */
1770             if (cp[0] == '8') {
1771                 cp++;
1772                 input_f = UTF8_INPUT;
1773             }else{
1774                 if ('1'== cp[0] && '6'==cp[1]) {
1775                     cp += 2;
1776                     input_f = UTF16_INPUT;
1777                     input_endian = ENDIAN_BIG;
1778                 } else if ('3'== cp[0] && '2'==cp[1]) {
1779                     cp += 2;
1780                     input_f = UTF32_INPUT;
1781                     input_endian = ENDIAN_BIG;
1782                 } else {
1783                     input_f = UTF8_INPUT;
1784                     continue;
1785                 }
1786                 if (cp[0]=='L') {
1787                     cp++;
1788                     input_endian = ENDIAN_LITTLE;
1789                 } else if (cp[0] == 'B') {
1790                     cp++;
1791                 }
1792             }
1793             continue;
1794 #endif
1795         /* Input code assumption */
1796         case 'J':   /* JIS input */
1797             input_f = JIS_INPUT;
1798             continue;
1799         case 'E':   /* AT&T EUC input */
1800             input_f = EUC_INPUT;
1801             continue;
1802         case 'S':   /* MS Kanji input */
1803             input_f = SJIS_INPUT;
1804             if (x0201_f==NO_X0201) x0201_f=TRUE;
1805             continue;
1806         case 'Z':   /* Convert X0208 alphabet to asii */
1807             /*  bit:0   Convert X0208
1808                 bit:1   Convert Kankaku to one space
1809                 bit:2   Convert Kankaku to two spaces
1810                 bit:3   Convert HTML Entity
1811             */
1812             if ('9'>= *cp && *cp>='0') 
1813                 alpha_f |= 1<<(*cp++ -'0');
1814             else 
1815                 alpha_f |= TRUE;
1816             continue;
1817         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1818             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1819             /* accept  X0201
1820                     ESC-(-I     in JIS, EUC, MS Kanji
1821                     SI/SO       in JIS, EUC, MS Kanji
1822                     SSO         in EUC, JIS, not in MS Kanji
1823                     MS Kanji (0xa0-0xdf) 
1824                output  X0201
1825                     ESC-(-I     in JIS (0x20-0x5f)
1826                     SSO         in EUC (0xa0-0xdf)
1827                     0xa0-0xd    in MS Kanji (0xa0-0xdf) 
1828             */
1829             continue;
1830         case 'X':   /* Assume X0201 kana */
1831             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1832             x0201_f = TRUE;
1833             continue;
1834         case 'F':   /* prserve new lines */
1835             fold_preserve_f = TRUE;
1836         case 'f':   /* folding -f60 or -f */
1837             fold_f = TRUE;
1838             fold_len = 0;
1839             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1840                 fold_len *= 10;
1841                 fold_len += *cp++ - '0';
1842             }
1843             if (!(0<fold_len && fold_len<BUFSIZ)) 
1844                 fold_len = DEFAULT_FOLD;
1845             if (*cp=='-') {
1846                 fold_margin = 0;
1847                 cp++;
1848                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1849                     fold_margin *= 10;
1850                     fold_margin += *cp++ - '0';
1851                 }
1852             }
1853             continue;
1854         case 'm':   /* MIME support */
1855             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1856             if (*cp=='B'||*cp=='Q') {
1857                 mime_decode_mode = *cp++;
1858                 mimebuf_f = FIXED_MIME;
1859             } else if (*cp=='N') {
1860                 mime_f = TRUE; cp++;
1861             } else if (*cp=='S') {
1862                 mime_f = STRICT_MIME; cp++;
1863             } else if (*cp=='0') {
1864                 mime_decode_f = FALSE;
1865                 mime_f = FALSE; cp++;
1866             }
1867             continue;
1868         case 'M':   /* MIME output */
1869             if (*cp=='B') {
1870                 mimeout_mode = 'B';
1871                 mimeout_f = FIXED_MIME; cp++;
1872             } else if (*cp=='Q') {
1873                 mimeout_mode = 'Q';
1874                 mimeout_f = FIXED_MIME; cp++;
1875             } else {
1876                 mimeout_f = TRUE;
1877             }
1878             continue;
1879         case 'B':   /* Broken JIS support */
1880             /*  bit:0   no ESC JIS
1881                 bit:1   allow any x on ESC-(-x or ESC-$-x
1882                 bit:2   reset to ascii on NL
1883             */
1884             if ('9'>= *cp && *cp>='0') 
1885                 broken_f |= 1<<(*cp++ -'0');
1886             else 
1887                 broken_f |= TRUE;
1888             continue;
1889 #ifndef PERL_XS
1890         case 'O':/* for Output file */
1891             file_out_f = TRUE;
1892             continue;
1893 #endif
1894         case 'c':/* add cr code */
1895             crmode_f = CRLF;
1896             continue;
1897         case 'd':/* delete cr code */
1898             crmode_f = NL;
1899             continue;
1900         case 'I':   /* ISO-2022-JP output */
1901             iso2022jp_f = TRUE;
1902             continue;
1903         case 'L':  /* line mode */
1904             if (*cp=='u') {         /* unix */
1905                 crmode_f = NL; cp++;
1906             } else if (*cp=='m') { /* mac */
1907                 crmode_f = CR; cp++;
1908             } else if (*cp=='w') { /* windows */
1909                 crmode_f = CRLF; cp++;
1910             } else if (*cp=='0') { /* no conversion  */
1911                 crmode_f = 0; cp++;
1912             }
1913             continue;
1914         case 'g':
1915 #ifndef PERL_XS
1916             guess_f = TRUE;
1917 #endif
1918             continue;
1919         case ' ':    
1920         /* module muliple options in a string are allowed for Perl moudle  */
1921             while(*cp && *cp++!='-');
1922             continue;
1923         default:
1924             /* bogus option but ignored */
1925             continue;
1926         }
1927     }
1928 }
1929
1930 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1931 {
1932     if (iconv_func){
1933         struct input_code *p = input_code_list;
1934         while (p->name){
1935             if (iconv_func == p->iconv_func){
1936                 return p;
1937             }
1938             p++;
1939         }
1940     }
1941     return 0;
1942 }
1943
1944 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1945 {
1946 #ifdef INPUT_CODE_FIX
1947     if (f || !input_f)
1948 #endif
1949         if (estab_f != f){
1950             estab_f = f;
1951         }
1952
1953     if (iconv_func
1954 #ifdef INPUT_CODE_FIX
1955         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1956 #endif
1957         ){
1958         iconv = iconv_func;
1959     }
1960 #ifdef CHECK_OPTION
1961     if (estab_f && iconv_for_check != iconv){
1962         struct input_code *p = find_inputcode_byfunc(iconv);
1963         if (p){
1964             set_input_codename(p->name);
1965             debug(input_codename);
1966         }
1967         iconv_for_check = iconv;
1968     }
1969 #endif
1970 }
1971
1972 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1973 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1974 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1975 #ifdef SHIFTJIS_CP932
1976 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B */
1977 #define SCORE_NO_EXIST (SCORE_CP932 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1978 #else
1979 #define SCORE_NO_EXIST (SCORE_DEPEND << 1)   /* \e$BB8:_$7$J$$J8;z\e(B */
1980 #endif
1981 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1982 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1983
1984 #define SCORE_INIT (SCORE_iMIME)
1985
1986 const nkf_char score_table_A0[] = {
1987     0, 0, 0, 0,
1988     0, 0, 0, 0,
1989     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1990     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1991 };
1992
1993 const nkf_char score_table_F0[] = {
1994     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1995     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1996     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1997     SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1998 };
1999
2000 void set_code_score(struct input_code *ptr, nkf_char score)
2001 {
2002     if (ptr){
2003         ptr->score |= score;
2004     }
2005 }
2006
2007 void clr_code_score(struct input_code *ptr, nkf_char score)
2008 {
2009     if (ptr){
2010         ptr->score &= ~score;
2011     }
2012 }
2013
2014 void code_score(struct input_code *ptr)
2015 {
2016     nkf_char c2 = ptr->buf[0];
2017 #ifdef UTF8_OUTPUT_ENABLE
2018     nkf_char c1 = ptr->buf[1];
2019 #endif
2020     if (c2 < 0){
2021         set_code_score(ptr, SCORE_ERROR);
2022     }else if (c2 == SSO){
2023         set_code_score(ptr, SCORE_KANA);
2024 #ifdef UTF8_OUTPUT_ENABLE
2025     }else if (!e2w_conv(c2, c1)){
2026         set_code_score(ptr, SCORE_NO_EXIST);
2027 #endif
2028     }else if ((c2 & 0x70) == 0x20){
2029         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2030     }else if ((c2 & 0x70) == 0x70){
2031         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2032     }else if ((c2 & 0x70) >= 0x50){
2033         set_code_score(ptr, SCORE_L2);
2034     }
2035 }
2036
2037 void status_disable(struct input_code *ptr)
2038 {
2039     ptr->stat = -1;
2040     ptr->buf[0] = -1;
2041     code_score(ptr);
2042     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2043 }
2044
2045 void status_push_ch(struct input_code *ptr, nkf_char c)
2046 {
2047     ptr->buf[ptr->index++] = c;
2048 }
2049
2050 void status_clear(struct input_code *ptr)
2051 {
2052     ptr->stat = 0;
2053     ptr->index = 0;
2054 }
2055
2056 void status_reset(struct input_code *ptr)
2057 {
2058     status_clear(ptr);
2059     ptr->score = SCORE_INIT;
2060 }
2061
2062 void status_reinit(struct input_code *ptr)
2063 {
2064     status_reset(ptr);
2065     ptr->_file_stat = 0;
2066 }
2067
2068 void status_check(struct input_code *ptr, nkf_char c)
2069 {
2070     if (c <= DEL && estab_f){
2071         status_reset(ptr);
2072     }
2073 }
2074
2075 void s_status(struct input_code *ptr, nkf_char c)
2076 {
2077     switch(ptr->stat){
2078       case -1:
2079           status_check(ptr, c);
2080           break;
2081       case 0:
2082           if (c <= DEL){
2083               break;
2084 #ifdef NUMCHAR_OPTION
2085           }else if (is_unicode_capsule(c)){
2086               break;
2087 #endif
2088           }else if (0xa1 <= c && c <= 0xdf){
2089               status_push_ch(ptr, SSO);
2090               status_push_ch(ptr, c);
2091               code_score(ptr);
2092               status_clear(ptr);
2093           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2094               ptr->stat = 1;
2095               status_push_ch(ptr, c);
2096 #ifdef SHIFTJIS_CP932
2097           }else if (cp51932_f
2098                     && is_ibmext_in_sjis(c)){
2099               ptr->stat = 2;
2100               status_push_ch(ptr, c);
2101 #endif /* SHIFTJIS_CP932 */
2102 #ifdef X0212_ENABLE
2103           }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2104               ptr->stat = 1;
2105               status_push_ch(ptr, c);
2106 #endif /* X0212_ENABLE */
2107           }else{
2108               status_disable(ptr);
2109           }
2110           break;
2111       case 1:
2112           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2113               status_push_ch(ptr, c);
2114               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2115               code_score(ptr);
2116               status_clear(ptr);
2117           }else{
2118               status_disable(ptr);
2119           }
2120           break;
2121       case 2:
2122 #ifdef SHIFTJIS_CP932
2123           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2124               status_push_ch(ptr, c);
2125               if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2126                   set_code_score(ptr, SCORE_CP932);
2127                   status_clear(ptr);
2128                   break;
2129               }
2130           }
2131 #endif /* SHIFTJIS_CP932 */
2132 #ifndef X0212_ENABLE
2133           status_disable(ptr);
2134 #endif
2135           break;
2136     }
2137 }
2138
2139 void e_status(struct input_code *ptr, nkf_char c)
2140 {
2141     switch (ptr->stat){
2142       case -1:
2143           status_check(ptr, c);
2144           break;
2145       case 0:
2146           if (c <= DEL){
2147               break;
2148 #ifdef NUMCHAR_OPTION
2149           }else if (is_unicode_capsule(c)){
2150               break;
2151 #endif
2152           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2153               ptr->stat = 1;
2154               status_push_ch(ptr, c);
2155 #ifdef X0212_ENABLE
2156           }else if (0x8f == c){
2157               ptr->stat = 2;
2158               status_push_ch(ptr, c);
2159 #endif /* X0212_ENABLE */
2160           }else{
2161               status_disable(ptr);
2162           }
2163           break;
2164       case 1:
2165           if (0xa1 <= c && c <= 0xfe){
2166               status_push_ch(ptr, c);
2167               code_score(ptr);
2168               status_clear(ptr);
2169           }else{
2170               status_disable(ptr);
2171           }
2172           break;
2173 #ifdef X0212_ENABLE
2174       case 2:
2175           if (0xa1 <= c && c <= 0xfe){
2176               ptr->stat = 1;
2177               status_push_ch(ptr, c);
2178           }else{
2179               status_disable(ptr);
2180           }
2181 #endif /* X0212_ENABLE */
2182     }
2183 }
2184
2185 #ifdef UTF8_INPUT_ENABLE
2186 void w_status(struct input_code *ptr, nkf_char c)
2187 {
2188     switch (ptr->stat){
2189       case -1:
2190           status_check(ptr, c);
2191           break;
2192       case 0:
2193           if (c <= DEL){
2194               break;
2195 #ifdef NUMCHAR_OPTION
2196           }else if (is_unicode_capsule(c)){
2197               break;
2198 #endif
2199           }else if (0xc0 <= c && c <= 0xdf){
2200               ptr->stat = 1;
2201               status_push_ch(ptr, c);
2202           }else if (0xe0 <= c && c <= 0xef){
2203               ptr->stat = 2;
2204               status_push_ch(ptr, c);
2205           }else if (0xf0 <= c && c <= 0xf4){
2206               ptr->stat = 3;
2207               status_push_ch(ptr, c);
2208           }else{
2209               status_disable(ptr);
2210           }
2211           break;
2212       case 1:
2213       case 2:
2214           if (0x80 <= c && c <= 0xbf){
2215               status_push_ch(ptr, c);
2216               if (ptr->index > ptr->stat){
2217                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2218                              && ptr->buf[2] == 0xbf);
2219                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2220                            &ptr->buf[0], &ptr->buf[1]);
2221                   if (!bom){
2222                       code_score(ptr);
2223                   }
2224                   status_clear(ptr);
2225               }
2226           }else{
2227               status_disable(ptr);
2228           }
2229           break;
2230       case 3:
2231         if (0x80 <= c && c <= 0xbf){
2232             if (ptr->index < ptr->stat){
2233                 status_push_ch(ptr, c);
2234             } else {
2235                 status_clear(ptr);
2236             }
2237           }else{
2238               status_disable(ptr);
2239           }
2240           break;
2241     }
2242 }
2243 #endif
2244
2245 void code_status(nkf_char c)
2246 {
2247     int action_flag = 1;
2248     struct input_code *result = 0;
2249     struct input_code *p = input_code_list;
2250     while (p->name){
2251         if (!p->status_func) {
2252             ++p;
2253             continue;
2254         }
2255         if (!p->status_func)
2256             continue;
2257         (p->status_func)(p, c);
2258         if (p->stat > 0){
2259             action_flag = 0;
2260         }else if(p->stat == 0){
2261             if (result){
2262                 action_flag = 0;
2263             }else{
2264                 result = p;
2265             }
2266         }
2267         ++p;
2268     }
2269
2270     if (action_flag){
2271         if (result && !estab_f){
2272             set_iconv(TRUE, result->iconv_func);
2273         }else if (c <= DEL){
2274             struct input_code *ptr = input_code_list;
2275             while (ptr->name){
2276                 status_reset(ptr);
2277                 ++ptr;
2278             }
2279         }
2280     }
2281 }
2282
2283 #ifndef WIN32DLL
2284 nkf_char std_getc(FILE *f)
2285 {
2286     if (std_gc_ndx){
2287         return std_gc_buf[--std_gc_ndx];
2288     }
2289     return getc(f);
2290 }
2291 #endif /*WIN32DLL*/
2292
2293 nkf_char std_ungetc(nkf_char c, FILE *f)
2294 {
2295     if (std_gc_ndx == STD_GC_BUFSIZE){
2296         return EOF;
2297     }
2298     std_gc_buf[std_gc_ndx++] = c;
2299     return c;
2300 }
2301
2302 #ifndef WIN32DLL
2303 void std_putc(nkf_char c)
2304 {
2305     if(c!=EOF)
2306       putchar(c);
2307 }
2308 #endif /*WIN32DLL*/
2309
2310 #if !defined(PERL_XS) && !defined(WIN32DLL)
2311 nkf_char noconvert(FILE *f)
2312 {
2313     nkf_char    c;
2314
2315     if (nop_f == 2)
2316         module_connection();
2317     while ((c = (*i_getc)(f)) != EOF)
2318       (*o_putc)(c);
2319     (*o_putc)(EOF);
2320     return 1;
2321 }
2322 #endif
2323
2324 void module_connection(void)
2325 {
2326     oconv = output_conv; 
2327     o_putc = std_putc;
2328
2329     /* replace continucation module, from output side */
2330
2331     /* output redicrection */
2332 #ifdef CHECK_OPTION
2333     if (noout_f || guess_f){
2334         o_putc = no_putc;
2335     }
2336 #endif
2337     if (mimeout_f) {
2338         o_mputc = o_putc;
2339         o_putc = mime_putc;
2340         if (mimeout_f == TRUE) {
2341             o_base64conv = oconv; oconv = base64_conv;
2342         }
2343         /* base64_count = 0; */
2344     }
2345
2346     if (crmode_f) {
2347         o_crconv = oconv; oconv = cr_conv;
2348     }
2349     if (rot_f) {
2350         o_rot_conv = oconv; oconv = rot_conv;
2351     }
2352     if (iso2022jp_f) {
2353         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2354     }
2355     if (hira_f) {
2356         o_hira_conv = oconv; oconv = hira_conv;
2357     }
2358     if (fold_f) {
2359         o_fconv = oconv; oconv = fold_conv;
2360         f_line = 0;
2361     }
2362     if (alpha_f || x0201_f) {
2363         o_zconv = oconv; oconv = z_conv;
2364     }
2365
2366     i_getc = std_getc;
2367     i_ungetc = std_ungetc;
2368     /* input redicrection */
2369 #ifdef INPUT_OPTION
2370     if (cap_f){
2371         i_cgetc = i_getc; i_getc = cap_getc;
2372         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2373     }
2374     if (url_f){
2375         i_ugetc = i_getc; i_getc = url_getc;
2376         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2377     }
2378 #endif
2379 #ifdef NUMCHAR_OPTION
2380     if (numchar_f){
2381         i_ngetc = i_getc; i_getc = numchar_getc;
2382         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2383     }
2384 #endif
2385 #ifdef UNICODE_NORMALIZATION
2386     if (nfc_f && input_f == UTF8_INPUT){
2387         i_nfc_getc = i_getc; i_getc = nfc_getc;
2388         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2389     }
2390 #endif
2391     if (mime_f && mimebuf_f==FIXED_MIME) {
2392         i_mgetc = i_getc; i_getc = mime_getc;
2393         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2394     }
2395     if (broken_f & 1) {
2396         i_bgetc = i_getc; i_getc = broken_getc;
2397         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2398     }
2399     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2400         set_iconv(-TRUE, e_iconv);
2401     } else if (input_f == SJIS_INPUT) {
2402         set_iconv(-TRUE, s_iconv);
2403 #ifdef UTF8_INPUT_ENABLE
2404     } else if (input_f == UTF8_INPUT) {
2405         set_iconv(-TRUE, w_iconv);
2406     } else if (input_f == UTF16_INPUT) {
2407         set_iconv(-TRUE, w_iconv16);
2408     } else if (input_f == UTF32_INPUT) {
2409         set_iconv(-TRUE, w_iconv32);
2410 #endif
2411     } else {
2412         set_iconv(FALSE, e_iconv);
2413     }
2414
2415     {
2416         struct input_code *p = input_code_list;
2417         while (p->name){
2418             status_reinit(p++);
2419         }
2420     }
2421 }
2422
2423 /*
2424  * Check and Ignore BOM
2425  */
2426 void check_bom(FILE *f)
2427 {
2428     int c2;
2429     switch(c2 = (*i_getc)(f)){
2430     case 0x00:
2431         if((c2 = (*i_getc)(f)) == 0x00){
2432             if((c2 = (*i_getc)(f)) == 0xFE){
2433                 if((c2 = (*i_getc)(f)) == 0xFF){
2434                     if(!input_f){
2435                         set_iconv(TRUE, w_iconv32);
2436                     }
2437                     if (iconv == w_iconv32) {
2438                         input_endian = ENDIAN_BIG;
2439                         return;
2440                     }
2441                     (*i_ungetc)(0xFF,f);
2442                 }else (*i_ungetc)(c2,f);
2443                 (*i_ungetc)(0xFE,f);
2444             }else if(c2 == 0xFF){
2445                 if((c2 = (*i_getc)(f)) == 0xFE){
2446                     if(!input_f){
2447                         set_iconv(TRUE, w_iconv32);
2448                     }
2449                     if (iconv == w_iconv32) {
2450                         input_endian = ENDIAN_2143;
2451                         return;
2452                     }
2453                     (*i_ungetc)(0xFF,f);
2454                 }else (*i_ungetc)(c2,f);
2455                 (*i_ungetc)(0xFF,f);
2456             }else (*i_ungetc)(c2,f);
2457             (*i_ungetc)(0x00,f);
2458         }else (*i_ungetc)(c2,f);
2459         (*i_ungetc)(0x00,f);
2460         break;
2461     case 0xEF:
2462         if((c2 = (*i_getc)(f)) == 0xBB){
2463             if((c2 = (*i_getc)(f)) == 0xBF){
2464                 if(!input_f){
2465                     set_iconv(TRUE, w_iconv);
2466                 }
2467                 if (iconv == w_iconv) {
2468                     return;
2469                 }
2470                 (*i_ungetc)(0xBF,f);
2471             }else (*i_ungetc)(c2,f);
2472             (*i_ungetc)(0xBB,f);
2473         }else (*i_ungetc)(c2,f);
2474         (*i_ungetc)(0xEF,f);
2475         break;
2476     case 0xFE:
2477         if((c2 = (*i_getc)(f)) == 0xFF){
2478             if((c2 = (*i_getc)(f)) == 0x00){
2479                 if((c2 = (*i_getc)(f)) == 0x00){
2480                     if(!input_f){
2481                         set_iconv(TRUE, w_iconv32);
2482                     }
2483                     if (iconv == w_iconv32) {
2484                         input_endian = ENDIAN_3412;
2485                         return;
2486                     }
2487                     (*i_ungetc)(0x00,f);
2488                 }else (*i_ungetc)(c2,f);
2489                 (*i_ungetc)(0x00,f);
2490             }else (*i_ungetc)(c2,f);
2491             if(!input_f){
2492                 set_iconv(TRUE, w_iconv16);
2493             }
2494             if (iconv == w_iconv16) {
2495                 input_endian = ENDIAN_BIG;
2496                 return;
2497             }
2498             (*i_ungetc)(0xFF,f);
2499         }else (*i_ungetc)(c2,f);
2500         (*i_ungetc)(0xFE,f);
2501         break;
2502     case 0xFF:
2503         if((c2 = (*i_getc)(f)) == 0xFE){
2504             if((c2 = (*i_getc)(f)) == 0x00){
2505                 if((c2 = (*i_getc)(f)) == 0x00){
2506                     if(!input_f){
2507                         set_iconv(TRUE, w_iconv32);
2508                     }
2509                     if (iconv == w_iconv32) {
2510                         input_endian = ENDIAN_LITTLE;
2511                         return;
2512                     }
2513                     (*i_ungetc)(0x00,f);
2514                 }else (*i_ungetc)(c2,f);
2515                 (*i_ungetc)(0x00,f);
2516             }else (*i_ungetc)(c2,f);
2517             if(!input_f){
2518                 set_iconv(TRUE, w_iconv16);
2519             }
2520             if (iconv == w_iconv16) {
2521                 input_endian = ENDIAN_LITTLE;
2522                 return;
2523             }
2524             (*i_ungetc)(0xFE,f);
2525         }else (*i_ungetc)(c2,f);
2526         (*i_ungetc)(0xFF,f);
2527         break;
2528     default:
2529         (*i_ungetc)(c2,f);
2530         break;
2531     }
2532 }
2533
2534 /*
2535    Conversion main loop. Code detection only. 
2536  */
2537
2538 nkf_char kanji_convert(FILE *f)
2539 {
2540     nkf_char    c3, c2=0, c1, c0=0;
2541     int is_8bit = FALSE;
2542
2543     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2544 #ifdef UTF8_INPUT_ENABLE
2545        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2546 #endif
2547       ){
2548         is_8bit = TRUE;
2549     }
2550
2551     input_mode = ASCII;
2552     output_mode = ASCII;
2553     shift_mode = FALSE;
2554
2555 #define NEXT continue      /* no output, get next */
2556 #define SEND ;             /* output c1 and c2, get next */
2557 #define LAST break         /* end of loop, go closing  */
2558
2559     module_connection();
2560     check_bom(f);
2561
2562     while ((c1 = (*i_getc)(f)) != EOF) {
2563 #ifdef INPUT_CODE_FIX
2564         if (!input_f)
2565 #endif
2566             code_status(c1);
2567         if (c2) {
2568             /* second byte */
2569             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2570                 /* in case of 8th bit is on */
2571                 if (!estab_f&&!mime_decode_mode) {
2572                     /* in case of not established yet */
2573                     /* It is still ambiguious */
2574                     if (h_conv(f, c2, c1)==EOF) 
2575                         LAST;
2576                     else 
2577                         c2 = 0;
2578                     NEXT;
2579                 } else {
2580                     /* in case of already established */
2581                     if (c1 < AT) {
2582                         /* ignore bogus code and not CP5022x UCD */
2583                         c2 = 0;
2584                         NEXT;
2585                     } else {
2586                         SEND;
2587                     }
2588                 }
2589             } else
2590                 /* second byte, 7 bit code */
2591                 /* it might be kanji shitfted */
2592                 if ((c1 == DEL) || (c1 <= SPACE)) {
2593                     /* ignore bogus first code */
2594                     c2 = 0;
2595                     NEXT;
2596                 } else
2597                     SEND;
2598         } else {
2599             /* first byte */
2600 #ifdef UTF8_INPUT_ENABLE
2601             if (iconv == w_iconv16) {
2602                 if (input_endian == ENDIAN_BIG) {
2603                     c2 = c1;
2604                     if ((c1 = (*i_getc)(f)) != EOF) {
2605                         if (0xD8 <= c2 && c2 <= 0xDB) {
2606                             if ((c0 = (*i_getc)(f)) != EOF) {
2607                                 c0 <<= 8;
2608                                 if ((c3 = (*i_getc)(f)) != EOF) {
2609                                     c0 |= c3;
2610                                 } else c2 = EOF;
2611                             } else c2 = EOF;
2612                         }
2613                     } else c2 = EOF;
2614                 } else {
2615                     if ((c2 = (*i_getc)(f)) != EOF) {
2616                         if (0xD8 <= c2 && c2 <= 0xDB) {
2617                             if ((c3 = (*i_getc)(f)) != EOF) {
2618                                 if ((c0 = (*i_getc)(f)) != EOF) {
2619                                     c0 <<= 8;
2620                                     c0 |= c3;
2621                                 } else c2 = EOF;
2622                             } else c2 = EOF;
2623                         }
2624                     } else c2 = EOF;
2625                 }
2626                 SEND;
2627             } else if(iconv == w_iconv32){
2628                 int c3 = c1;
2629                 if((c2 = (*i_getc)(f)) != EOF &&
2630                    (c1 = (*i_getc)(f)) != EOF &&
2631                    (c0 = (*i_getc)(f)) != EOF){
2632                     switch(input_endian){
2633                     case ENDIAN_BIG:
2634                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2635                         break;
2636                     case ENDIAN_LITTLE:
2637                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2638                         break;
2639                     case ENDIAN_2143:
2640                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2641                         break;
2642                     case ENDIAN_3412:
2643                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2644                         break;
2645                     }
2646                     c2 = 0;
2647                 }else{
2648                     c2 = EOF;
2649                 }
2650                 SEND;
2651             } else
2652 #endif
2653 #ifdef NUMCHAR_OPTION
2654             if (is_unicode_capsule(c1)){
2655                 SEND;
2656             } else
2657 #endif
2658             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2659                 /* 8 bit code */
2660                 if (!estab_f && !iso8859_f) {
2661                     /* not established yet */
2662                     c2 = c1;
2663                     NEXT;
2664                 } else { /* estab_f==TRUE */
2665                     if (iso8859_f) {
2666                         c2 = ISO8859_1;
2667                         c1 &= 0x7f;
2668                         SEND;
2669                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2670                         /* SJIS X0201 Case... */
2671                         if(iso2022jp_f && x0201_f==NO_X0201) {
2672                             (*oconv)(GETA1, GETA2);
2673                             NEXT;
2674                         } else {
2675                             c2 = X0201;
2676                             c1 &= 0x7f;
2677                             SEND;
2678                         }
2679                     } else if (c1==SSO && iconv != s_iconv) {
2680                         /* EUC X0201 Case */
2681                         c1 = (*i_getc)(f);  /* skip SSO */
2682                         code_status(c1);
2683                         if (SSP<=c1 && c1<0xe0) {
2684                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2685                                 (*oconv)(GETA1, GETA2);
2686                                 NEXT;
2687                             } else {
2688                                 c2 = X0201;
2689                                 c1 &= 0x7f;
2690                                 SEND;
2691                             }
2692                         } else  { /* bogus code, skip SSO and one byte */
2693                             NEXT;
2694                         }
2695                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2696                                (c1 == 0xFD || c1 == 0xFE)) {
2697                         /* CP10001 */
2698                         c2 = X0201;
2699                         c1 &= 0x7f;
2700                         SEND;
2701                     } else {
2702                        /* already established */
2703                        c2 = c1;
2704                        NEXT;
2705                     }
2706                 }
2707             } else if ((c1 > SPACE) && (c1 != DEL)) {
2708                 /* in case of Roman characters */
2709                 if (shift_mode) { 
2710                     /* output 1 shifted byte */
2711                     if (iso8859_f) {
2712                         c2 = ISO8859_1;
2713                         SEND;
2714                     } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2715                       /* output 1 shifted byte */
2716                         if(iso2022jp_f && x0201_f==NO_X0201) {
2717                             (*oconv)(GETA1, GETA2);
2718                             NEXT;
2719                         } else {
2720                             c2 = X0201;
2721                             SEND;
2722                         }
2723                     } else {
2724                         /* look like bogus code */
2725                         NEXT;
2726                     }
2727                 } else if (input_mode == X0208 || input_mode == X0212 ||
2728                            input_mode == X0213_1 || input_mode == X0213_2) {
2729                     /* in case of Kanji shifted */
2730                     c2 = c1;
2731                     NEXT;
2732                 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2733                     /* Check MIME code */
2734                     if ((c1 = (*i_getc)(f)) == EOF) {
2735                         (*oconv)(0, '=');
2736                         LAST;
2737                     } else if (c1 == '?') {
2738                         /* =? is mime conversion start sequence */
2739                         if(mime_f == STRICT_MIME) {
2740                             /* check in real detail */
2741                             if (mime_begin_strict(f) == EOF) 
2742                                 LAST;
2743                             else
2744                                 NEXT;
2745                         } else if (mime_begin(f) == EOF) 
2746                             LAST;
2747                         else
2748                             NEXT;
2749                     } else {
2750                         (*oconv)(0, '=');
2751                         (*i_ungetc)(c1,f);
2752                         NEXT;
2753                     }
2754                 } else {
2755                     /* normal ASCII code */ 
2756                     SEND;
2757                 }
2758             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {\r
2759                 shift_mode = FALSE; 
2760                 NEXT;
2761             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {\r
2762                 shift_mode = TRUE; 
2763                 NEXT;
2764             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {\r
2765                 if ((c1 = (*i_getc)(f)) == EOF) {
2766                     /*  (*oconv)(0, ESC); don't send bogus code */
2767                     LAST;
2768                 } else if (c1 == '$') {
2769                     if ((c1 = (*i_getc)(f)) == EOF) {
2770                         /*
2771                         (*oconv)(0, ESC); don't send bogus code 
2772                         (*oconv)(0, '$'); */
2773                         LAST;
2774                     } else if (c1 == '@'|| c1 == 'B') {
2775                         /* This is kanji introduction */
2776                         input_mode = X0208;
2777                         shift_mode = FALSE;
2778                         set_input_codename("ISO-2022-JP");
2779 #ifdef CHECK_OPTION
2780                         debug(input_codename);
2781 #endif
2782                         NEXT;
2783                     } else if (c1 == '(') {
2784                         if ((c1 = (*i_getc)(f)) == EOF) {
2785                             /* don't send bogus code 
2786                             (*oconv)(0, ESC);
2787                             (*oconv)(0, '$');
2788                             (*oconv)(0, '(');
2789                                 */
2790                             LAST;
2791                         } else if (c1 == '@'|| c1 == 'B') {
2792                             /* This is kanji introduction */
2793                             input_mode = X0208;
2794                             shift_mode = FALSE;
2795                             NEXT;
2796 #ifdef X0212_ENABLE
2797                         } else if (c1 == 'D'){
2798                             input_mode = X0212;
2799                             shift_mode = FALSE;
2800                             NEXT;
2801 #endif /* X0212_ENABLE */
2802                         } else if (c1 == (X0213_1&0x7F)){
2803                             input_mode = X0213_1;
2804                             shift_mode = FALSE;
2805                             NEXT;
2806                         } else if (c1 == (X0213_2&0x7F)){
2807                             input_mode = X0213_2;
2808                             shift_mode = FALSE;
2809                             NEXT;
2810                         } else {
2811                             /* could be some special code */
2812                             (*oconv)(0, ESC);
2813                             (*oconv)(0, '$');
2814                             (*oconv)(0, '(');
2815                             (*oconv)(0, c1);
2816                             NEXT;
2817                         }
2818                     } else if (broken_f&0x2) {
2819                         /* accept any ESC-(-x as broken code ... */
2820                         input_mode = X0208;
2821                         shift_mode = FALSE;
2822                         NEXT;
2823                     } else {
2824                         (*oconv)(0, ESC);
2825                         (*oconv)(0, '$');
2826                         (*oconv)(0, c1);
2827                         NEXT;
2828                     }
2829                 } else if (c1 == '(') {
2830                     if ((c1 = (*i_getc)(f)) == EOF) {
2831                         /* don't send bogus code 
2832                         (*oconv)(0, ESC);
2833                         (*oconv)(0, '('); */
2834                         LAST;
2835                     } else {
2836                         if (c1 == 'I') {
2837                             /* This is X0201 kana introduction */
2838                             input_mode = X0201; shift_mode = X0201;
2839                             NEXT;
2840                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2841                             /* This is X0208 kanji introduction */
2842                             input_mode = ASCII; shift_mode = FALSE;
2843                             NEXT;
2844                         } else if (broken_f&0x2) {
2845                             input_mode = ASCII; shift_mode = FALSE;
2846                             NEXT;
2847                         } else {
2848                             (*oconv)(0, ESC);
2849                             (*oconv)(0, '(');
2850                             /* maintain various input_mode here */
2851                             SEND;
2852                         }
2853                     }
2854                } else if ( c1 == 'N' || c1 == 'n' ){
2855                    /* SS2 */
2856                    c3 = (*i_getc)(f);  /* skip SS2 */
2857                    if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2858                        c1 = c3;
2859                        c2 = X0201;
2860                        SEND;
2861                    }else{
2862                        (*i_ungetc)(c3, f);
2863                        /* lonely ESC  */
2864                        (*oconv)(0, ESC);
2865                        SEND;
2866                    }
2867                 } else {
2868                     /* lonely ESC  */
2869                     (*oconv)(0, ESC);
2870                     SEND;
2871                 }
2872             } else if (c1 == ESC && iconv == s_iconv) {
2873                 /* ESC in Shift_JIS */
2874                 if ((c1 = (*i_getc)(f)) == EOF) {
2875                     /*  (*oconv)(0, ESC); don't send bogus code */
2876                     LAST;
2877                 } else if (c1 == '$') {
2878                     /* J-PHONE emoji */
2879                     if ((c1 = (*i_getc)(f)) == EOF) {
2880                         /*
2881                            (*oconv)(0, ESC); don't send bogus code 
2882                            (*oconv)(0, '$'); */
2883                         LAST;
2884                     } else {
2885                         if (('E' <= c1 && c1 <= 'G') ||
2886                             ('O' <= c1 && c1 <= 'Q')) {
2887                             /*
2888                                NUM : 0 1 2 3 4 5
2889                                BYTE: G E F O P Q
2890                                C%7 : 1 6 0 2 3 4
2891                                C%7 : 0 1 2 3 4 5 6
2892                                NUM : 2 0 3 4 5 X 1
2893                              */
2894                             static const int jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2895                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SPACE + 0xE000 + CLASS_UNICODE;
2896                             while ((c1 = (*i_getc)(f)) != EOF) {
2897                                 if (SPACE <= c1 && c1 <= 'z') {
2898                                     (*oconv)(0, c1 + c0);
2899                                 } else break; /* c1 == SO */
2900                             }
2901                         }
2902                     }
2903                     if (c1 == EOF) LAST;
2904                     NEXT;
2905                 } else {
2906                     /* lonely ESC  */
2907                     (*oconv)(0, ESC);
2908                     SEND;
2909                 }
2910             } else if (c1 == NL || c1 == CR) {
2911                 if (broken_f&4) {
2912                     input_mode = ASCII; set_iconv(FALSE, 0);
2913                     SEND;
2914                 } else if (mime_decode_f && !mime_decode_mode){
2915                     if (c1 == NL) {
2916                         if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2917                             i_ungetc(SPACE,f);
2918                             continue;
2919                         } else {
2920                             i_ungetc(c1,f);
2921                         }
2922                         c1 = NL;
2923                         SEND;
2924                     } else  { /* if (c1 == CR)*/
2925                         if ((c1=(*i_getc)(f))!=EOF) {
2926                             if (c1==SPACE) {
2927                                 i_ungetc(SPACE,f);
2928                                 continue;
2929                             } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2930                                 i_ungetc(SPACE,f);
2931                                 continue;
2932                             } else {
2933                                 i_ungetc(c1,f);
2934                             }
2935                             i_ungetc(NL,f);
2936                         } else {
2937                             i_ungetc(c1,f);
2938                         }
2939                         c1 = CR;
2940                         SEND;
2941                     }
2942                 }
2943                 if (!crmode_f) {
2944                     if (prev_cr && c1 == NL) crmode_f = CRLF;
2945                     else crmode_f = c1;
2946                 }
2947             } else if (c1 == DEL && input_mode == X0208 ) {
2948                 /* CP5022x */
2949                 c2 = c1;
2950                 NEXT;
2951             } else 
2952                 SEND;
2953         }
2954         /* send: */
2955         switch(input_mode){
2956         case ASCII:
2957             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2958             case -2:
2959                 /* 4 bytes UTF-8 */
2960                 if ((c0 = (*i_getc)(f)) != EOF) {
2961                     code_status(c0);
2962                     c0 <<= 8;
2963                     if ((c3 = (*i_getc)(f)) != EOF) {
2964                         code_status(c3);
2965                         (*iconv)(c2, c1, c0|c3);
2966                     }
2967                 }
2968                 break;
2969             case -1:
2970                 /* 3 bytes EUC or UTF-8 */
2971                 if ((c0 = (*i_getc)(f)) != EOF) {
2972                     code_status(c0);
2973                     (*iconv)(c2, c1, c0);
2974                 }
2975                 break;
2976             }
2977             break;
2978         case X0208:
2979         case X0213_1:
2980             if (ms_ucs_map_f &&
2981                 0x7F <= c2 && c2 <= 0x92 &&
2982                 0x21 <= c1 && c1 <= 0x7E) {
2983                 /* CP932 UDC */
2984                 if(c1 == 0x7F) return 0;
2985                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2986                 c2 = 0;
2987             }
2988             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2989             break;
2990 #ifdef X0212_ENABLE
2991         case X0212:
2992             (*oconv)(PREFIX_EUCG3 | c2, c1);
2993             break;
2994 #endif /* X0212_ENABLE */
2995         case X0213_2:
2996             (*oconv)(PREFIX_EUCG3 | c2, c1);
2997             break;
2998         default:
2999             (*oconv)(input_mode, c1);  /* other special case */
3000         }
3001
3002         c2 = 0;
3003         c0 = 0;
3004         continue;
3005         /* goto next_word */
3006     }
3007
3008     /* epilogue */
3009     (*iconv)(EOF, 0, 0);
3010     if (!is_inputcode_set)
3011     {
3012         if (is_8bit) {
3013             struct input_code *p = input_code_list;
3014             struct input_code *result = p;
3015             while (p->name){
3016                 if (p->score < result->score) result = p;
3017                 ++p;
3018             }
3019             set_input_codename(result->name);
3020         }
3021     }
3022     return 1;
3023 }
3024
3025 nkf_char
3026 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3027 {
3028     nkf_char ret, c3, c0;
3029     int hold_index;
3030
3031
3032     /** it must NOT be in the kanji shifte sequence      */
3033     /** it must NOT be written in JIS7                   */
3034     /** and it must be after 2 byte 8bit code            */
3035
3036     hold_count = 0;
3037     push_hold_buf(c2);
3038     push_hold_buf(c1);
3039
3040     while ((c1 = (*i_getc)(f)) != EOF) {
3041         if (c1 == ESC){
3042             (*i_ungetc)(c1,f);
3043             break;
3044         }
3045         code_status(c1);
3046         if (push_hold_buf(c1) == EOF || estab_f){
3047             break;
3048         }
3049     }
3050
3051     if (!estab_f){
3052         struct input_code *p = input_code_list;
3053         struct input_code *result = p;
3054         if (c1 == EOF){
3055             code_status(c1);
3056         }
3057         while (p->name){
3058             if (p->status_func && p->score < result->score){
3059                 result = p;
3060             }
3061             ++p;
3062         }
3063         set_iconv(TRUE, result->iconv_func);
3064     }
3065
3066
3067     /** now,
3068      ** 1) EOF is detected, or
3069      ** 2) Code is established, or
3070      ** 3) Buffer is FULL (but last word is pushed)
3071      **
3072      ** in 1) and 3) cases, we continue to use
3073      ** Kanji codes by oconv and leave estab_f unchanged.
3074      **/
3075
3076     ret = c1;
3077     hold_index = 0;
3078     while (hold_index < hold_count){
3079         c2 = hold_buf[hold_index++];
3080         if (c2 <= DEL
3081 #ifdef NUMCHAR_OPTION
3082             || is_unicode_capsule(c2)
3083 #endif
3084             ){
3085             (*iconv)(0, c2, 0);
3086             continue;
3087         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3088             (*iconv)(X0201, c2, 0);
3089             continue;
3090         }
3091         if (hold_index < hold_count){
3092             c1 = hold_buf[hold_index++];
3093         }else{
3094             c1 = (*i_getc)(f);
3095             if (c1 == EOF){
3096                 c3 = EOF;
3097                 break;
3098             }
3099             code_status(c1);
3100         }
3101         c0 = 0;
3102         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3103         case -2:
3104             /* 4 bytes UTF-8 */
3105             if (hold_index < hold_count){
3106                 c0 = hold_buf[hold_index++];
3107             } else if ((c0 = (*i_getc)(f)) == EOF) {
3108                 ret = EOF;
3109                 break;
3110             } else {
3111                 code_status(c0);
3112                 c0 <<= 8;
3113                 if (hold_index < hold_count){
3114                     c3 = hold_buf[hold_index++];
3115                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3116                     c0 = ret = EOF;
3117                     break;
3118                 } else {
3119                     code_status(c3);
3120                     (*iconv)(c2, c1, c0|c3);
3121                 }
3122             }
3123             break;
3124         case -1:
3125             /* 3 bytes EUC or UTF-8 */
3126             if (hold_index < hold_count){
3127                 c0 = hold_buf[hold_index++];
3128             } else if ((c0 = (*i_getc)(f)) == EOF) {
3129                 ret = EOF;
3130                 break;
3131             } else {
3132                 code_status(c0);
3133             }
3134             (*iconv)(c2, c1, c0);
3135             break;
3136         }
3137         if (c0 == EOF) break;
3138     }
3139     return ret;
3140 }
3141
3142 nkf_char push_hold_buf(nkf_char c2)
3143 {
3144     if (hold_count >= HOLD_SIZE*2)
3145         return (EOF);
3146     hold_buf[hold_count++] = (unsigned char)c2;
3147     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3148 }
3149
3150 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3151 {
3152 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3153     nkf_char val;
3154 #endif
3155     static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3156 #ifdef SHIFTJIS_CP932
3157     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3158         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3159         if (val){
3160             c2 = val >> 8;
3161             c1 = val & 0xff;
3162         }
3163     }
3164     if (cp932inv_f
3165         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3166         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3167         if (c){
3168             c2 = c >> 8;
3169             c1 = c & 0xff;
3170         }
3171     }
3172 #endif /* SHIFTJIS_CP932 */
3173 #ifdef X0212_ENABLE
3174     if (!x0213_f && is_ibmext_in_sjis(c2)){
3175         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3176         if (val){
3177             if (val > 0x7FFF){
3178                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3179                 c1 = val & 0xff;
3180             }else{
3181                 c2 = val >> 8;
3182                 c1 = val & 0xff;
3183             }
3184             if (p2) *p2 = c2;
3185             if (p1) *p1 = c1;
3186             return 0;
3187         }
3188     }
3189 #endif
3190     if(c2 >= 0x80){
3191         if(x0213_f && c2 >= 0xF0){
3192             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3193                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3194             }else{ /* 78<=k<=94 */
3195                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3196                 if (0x9E < c1) c2++;
3197             }
3198         }else{
3199             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3200             if (0x9E < c1) c2++;
3201         }
3202         if (c1 < 0x9F)
3203             c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
3204         else {
3205             c1 = c1 - 0x7E;
3206         }
3207     }
3208
3209 #ifdef X0212_ENABLE
3210     c2 = x0212_unshift(c2);
3211 #endif
3212     if (p2) *p2 = c2;
3213     if (p1) *p1 = c1;
3214     return 0;
3215 }
3216
3217 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3218 {
3219     if (c2 == X0201) {
3220         c1 &= 0x7f;
3221     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
3222         /* NOP */
3223     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3224         /* CP932 UDC */
3225         if(c1 == 0x7F) return 0;
3226         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3227         c2 = 0;
3228     } else {
3229         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3230         if (ret) return ret;
3231     }
3232     (*oconv)(c2, c1);
3233     return 0;
3234 }
3235
3236 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3237 {
3238     if (c2 == X0201) {
3239         c1 &= 0x7f;
3240 #ifdef X0212_ENABLE
3241     }else if (c2 == 0x8f){
3242         if (c0 == 0){
3243             return -1;
3244         }
3245         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3246             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3247             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3248             c2 = 0;
3249         } else {
3250             c2 = (c2 << 8) | (c1 & 0x7f);
3251             c1 = c0 & 0x7f;
3252 #ifdef SHIFTJIS_CP932
3253             if (cp51932_f){
3254                 nkf_char s2, s1;
3255                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3256                     s2e_conv(s2, s1, &c2, &c1);
3257                     if (c2 < 0x100){
3258                         c1 &= 0x7f;
3259                         c2 &= 0x7f;
3260                     }
3261                 }
3262             }
3263 #endif /* SHIFTJIS_CP932 */
3264         }
3265 #endif /* X0212_ENABLE */
3266     } else if (c2 == SSO){
3267         c2 = X0201;
3268         c1 &= 0x7f;
3269     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
3270         /* NOP */
3271     } else {
3272         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3273             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3274             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3275             c2 = 0;
3276         } else {
3277             c1 &= 0x7f;
3278             c2 &= 0x7f;
3279 #ifdef SHIFTJIS_CP932
3280             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3281                 nkf_char s2, s1;
3282                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3283                     s2e_conv(s2, s1, &c2, &c1);
3284                     if (c2 < 0x100){
3285                         c1 &= 0x7f;
3286                         c2 &= 0x7f;
3287                     }
3288                 }
3289             }
3290 #endif /* SHIFTJIS_CP932 */
3291         }
3292     }
3293     (*oconv)(c2, c1);
3294     return 0;
3295 }
3296
3297 #ifdef UTF8_INPUT_ENABLE
3298 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3299 {
3300     nkf_char ret = 0;
3301
3302     if (!c1){
3303         *p2 = 0;
3304         *p1 = c2;
3305     }else if (0xc0 <= c2 && c2 <= 0xef) {
3306         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3307 #ifdef NUMCHAR_OPTION
3308         if (ret > 0){
3309             if (p2) *p2 = 0;
3310             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3311             ret = 0;
3312         }
3313 #endif
3314     }
3315     return ret;
3316 }
3317
3318 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3319 {
3320     nkf_char ret = 0;
3321     static const int w_iconv_utf8_1st_byte[] =
3322     { /* 0xC0 - 0xFF */
3323         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3324         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3325         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3326         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3327     
3328     if (c2 < 0 || 0xff < c2) {
3329     }else if (c2 == 0) { /* 0 : 1 byte*/
3330         c0 = 0;
3331     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3332         return 0;
3333     } else{
3334         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3335         case 21:
3336             if (c1 < 0x80 || 0xBF < c1) return 0;
3337             break;
3338         case 30:
3339             if (c0 == 0) return -1;
3340             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3341                 return 0;
3342             break;
3343         case 31:
3344         case 33:
3345             if (c0 == 0) return -1;
3346             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3347                 return 0;
3348             break;
3349         case 32:
3350             if (c0 == 0) return -1;
3351             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3352                 return 0;
3353             break;
3354         case 40:
3355             if (c0 == 0) return -2;
3356             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3357                 return 0;
3358             break;
3359         case 41:
3360             if (c0 == 0) return -2;
3361             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3362                 return 0;
3363             break;
3364         case 42:
3365             if (c0 == 0) return -2;
3366             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3367                 return 0;
3368             break;
3369         default:
3370             return 0;
3371             break;
3372         }
3373     }
3374     if (c2 == 0 || c2 == EOF){
3375     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3376         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3377         c2 = 0;
3378     } else {
3379         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3380     }
3381     if (ret == 0){
3382         (*oconv)(c2, c1);
3383     }
3384     return ret;
3385 }
3386 #endif
3387
3388 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3389 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3390 {
3391     val &= VALUE_MASK;
3392     if (val < 0x80){
3393         *p2 = val;
3394         *p1 = 0;
3395         *p0 = 0;
3396     }else if (val < 0x800){
3397         *p2 = 0xc0 | (val >> 6);
3398         *p1 = 0x80 | (val & 0x3f);
3399         *p0 = 0;
3400     } else if (val <= NKF_INT32_C(0xFFFF)) {
3401         *p2 = 0xe0 | (val >> 12);
3402         *p1 = 0x80 | ((val >> 6) & 0x3f);
3403         *p0 = 0x80 | (val        & 0x3f);
3404     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3405         *p2 = 0xe0 |  (val >> 16);
3406         *p1 = 0x80 | ((val >> 12) & 0x3f);
3407         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3408     } else {
3409         *p2 = 0;
3410         *p1 = 0;
3411         *p0 = 0;
3412     }
3413 }
3414 #endif
3415
3416 #ifdef UTF8_INPUT_ENABLE
3417 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3418 {
3419     nkf_char val;
3420     if (c2 >= 0xf8) {
3421         val = -1;
3422     } else if (c2 >= 0xf0){
3423         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3424         val = (c2 & 0x0f) << 18;
3425         val |= (c1 & 0x3f) << 12;
3426         val |= (c0 & 0x3f00) >> 2;
3427         val |= (c0 & 0x3f);
3428     }else if (c2 >= 0xe0){
3429         val = (c2 & 0x0f) << 12;
3430         val |= (c1 & 0x3f) << 6;
3431         val |= (c0 & 0x3f);
3432     }else if (c2 >= 0xc0){
3433         val = (c2 & 0x1f) << 6;
3434         val |= (c1 & 0x3f);
3435     }else{
3436         val = c2;
3437     }
3438     return val;
3439 }
3440
3441 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3442 {
3443     nkf_char c2, c1, c0;
3444     nkf_char ret = 0;
3445     val &= VALUE_MASK;
3446     if (val < 0x80){
3447         *p2 = 0;
3448         *p1 = val;
3449     }else{
3450         w16w_conv(val, &c2, &c1, &c0);
3451         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3452 #ifdef NUMCHAR_OPTION
3453         if (ret > 0){
3454             *p2 = 0;
3455             *p1 = CLASS_UNICODE | val;
3456             ret = 0;
3457         }
3458 #endif
3459     }
3460     return ret;
3461 }
3462 #endif
3463
3464 #ifdef UTF8_INPUT_ENABLE
3465 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3466 {
3467     nkf_char ret = 0;
3468     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3469         (*oconv)(c2, c1);
3470         return 0;
3471     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3472         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3473             return -2;
3474         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3475         c2 = 0;
3476     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3477         /*
3478            return 2;
3479         */
3480         return 1;
3481     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3482     if (ret) return ret;
3483     (*oconv)(c2, c1);
3484     return 0;
3485 }
3486
3487 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3488 {
3489     int ret = 0;
3490
3491     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3492     } else if (is_unicode_bmp(c1)) {
3493         ret = w16e_conv(c1, &c2, &c1);
3494     } else {
3495         c2 = 0;
3496         c1 =  CLASS_UNICODE | c1;
3497     }
3498     if (ret) return ret;
3499     (*oconv)(c2, c1);
3500     return 0;
3501 }
3502
3503 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3504 {
3505     const unsigned short *const *pp;
3506     const unsigned short *const *const *ppp;
3507     static const int no_best_fit_chars_table_C2[] =
3508     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3509         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3510         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3511         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3512     static const int no_best_fit_chars_table_C2_ms[] =
3513     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3514         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3515         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3516         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3517     static const int no_best_fit_chars_table_932_C2[] =
3518     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3519         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3520         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3521         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3522     static const int no_best_fit_chars_table_932_C3[] =
3523     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3524         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3525         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3526         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3527     nkf_char ret = 0;
3528
3529     if(c2 < 0x80){
3530         *p2 = 0;
3531         *p1 = c2;
3532     }else if(c2 < 0xe0){
3533         if(no_best_fit_chars_f){
3534             if(ms_ucs_map_f == UCS_MAP_CP932){
3535                 switch(c2){
3536                 case 0xC2:
3537                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3538                     break;
3539                 case 0xC3:
3540                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3541                     break;
3542                 }
3543             }else if(!cp932inv_f){
3544                 switch(c2){
3545                 case 0xC2:
3546                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3547                     break;
3548                 case 0xC3:
3549                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3550                     break;
3551                 }
3552             }else if(ms_ucs_map_f == UCS_MAP_MS){
3553                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3554             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3555                 switch(c2){
3556                 case 0xC2:
3557                     switch(c1){
3558                     case 0xA2:
3559                     case 0xA3:
3560                     case 0xA5:
3561