OSDN Git Service

00d36d65f94e0462e57078b42b38cf6f3aa0cde8
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B 
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program 
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.  
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30 ** UTF-8 \e$B%5%]!<%H$K$D$$$F\e(B
31 **    \e$B=>Mh$N\e(B nkf \e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9\e(B
32 **    nkf -e \e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G\e(B UTF-8 \e$B$HH=Dj$5$l$l$P!"\e(B
33 **    \e$B$=$N$^$^\e(B euc-jp \e$B$KJQ49$5$l$^$9\e(B
34 **
35 **    \e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#\e(B
36 **    (\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O\e(B)
37 **
38 **    \e$B2?$+LdBj$r8+$D$1$?$i!"\e(B
39 **        E-Mail: furukawa@tcp-ip.or.jp
40 **    \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.118 2006/11/10 09:45:39 naruse Exp $ */
43 #define NKF_VERSION "2.0.8"
44 #define NKF_RELEASE_DATE "2006-11-04"
45 #include "config.h"
46 #include "utf8tbl.h"
47
48 #define COPY_RIGHT \
49     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
50     "Copyright (C) 2002-2006 Kono, Furukawa, Naruse, mastodon"
51
52
53 /*
54 **
55 **
56 **
57 ** USAGE:       nkf [flags] [file] 
58 **
59 ** Flags:
60 ** b    Output is buffered             (DEFAULT)
61 ** u    Output is unbuffered
62 **
63 ** t    no operation
64 **
65 ** j    Output code is JIS 7 bit        (DEFAULT SELECT) 
66 ** s    Output code is MS Kanji         (DEFAULT SELECT) 
67 ** e    Output code is AT&T JIS         (DEFAULT SELECT) 
68 ** w    Output code is AT&T JIS         (DEFAULT SELECT) 
69 ** l    Output code is JIS 7bit and ISO8859-1 Latin-1
70 **
71 ** m    MIME conversion for ISO-2022-JP
72 ** I    Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
73 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
74 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
75 ** M    MIME output conversion 
76 **
77 ** r  {de/en}crypt ROT13/47
78 **
79 ** v  display Version
80 **
81 ** T  Text mode output        (for MS-DOS)
82 **
83 ** x    Do not convert X0201 kana into X0208
84 ** Z    Convert X0208 alphabet to ASCII
85 **
86 ** f60  fold option
87 **
88 ** m    MIME decode
89 ** B    try to fix broken JIS, missing Escape
90 ** B[1-9]  broken level
91 **
92 ** O   Output to 'nkf.out' file or last file name
93 ** d   Delete \r in line feed 
94 ** c   Add \r in line feed 
95 ** -- other long option
96 ** -- ignore following option (don't use with -O )
97 **
98 **/
99
100 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
101 #define MSDOS
102 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
103 #define __WIN32__
104 #endif
105 #endif
106
107 #ifdef PERL_XS
108 #undef OVERWRITE
109 #endif
110
111 #ifndef PERL_XS
112 #include <stdio.h>
113 #endif
114
115 #include <stdlib.h>
116 #include <string.h>
117
118 #if defined(MSDOS) || defined(__OS2__)
119 #include <fcntl.h>
120 #include <io.h>
121 #if defined(_MSC_VER) || defined(__WATCOMC__)
122 #define mktemp _mktemp
123 #endif
124 #endif
125
126 #ifdef MSDOS
127 #ifdef LSI_C
128 #define setbinmode(fp) fsetbin(fp)
129 #elif defined(__DJGPP__)
130 #include <libc/dosio.h>
131 #define setbinmode(fp) djgpp_setbinmode(fp)
132 #else /* Microsoft C, Turbo C */
133 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
134 #endif
135 #else /* UNIX */
136 #define setbinmode(fp)
137 #endif
138
139 #if defined(__DJGPP__)
140 void  djgpp_setbinmode(FILE *fp)
141 {
142     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
143     int fd, m;
144     fd = fileno(fp);
145     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
146     __file_handle_set(fd, m);
147 }
148 #endif
149
150 #ifdef _IOFBF /* SysV and MSDOS, Windows */
151 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
152 #else /* BSD */
153 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
154 #endif
155
156 /*Borland C++ 4.5 EasyWin*/
157 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
158 #define         EASYWIN
159 #ifndef __WIN16__
160 #define __WIN16__
161 #endif
162 #include <windows.h>
163 #endif
164
165 #ifdef OVERWRITE
166 /* added by satoru@isoternet.org */
167 #if defined(__EMX__)
168 #include <sys/types.h>
169 #endif
170 #include <sys/stat.h>
171 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
172 #include <unistd.h>
173 #if defined(__WATCOMC__)
174 #include <sys/utime.h>
175 #else
176 #include <utime.h>
177 #endif
178 #else /* defined(MSDOS) */
179 #ifdef __WIN32__
180 #ifdef __BORLANDC__ /* BCC32 */
181 #include <utime.h>
182 #else /* !defined(__BORLANDC__) */
183 #include <sys/utime.h>
184 #endif /* (__BORLANDC__) */
185 #else /* !defined(__WIN32__) */
186 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
187 #include <sys/utime.h>
188 #elif defined(__TURBOC__) /* BCC */
189 #include <utime.h>
190 #elif defined(LSI_C) /* LSI C */
191 #endif /* (__WIN32__) */
192 #endif
193 #endif
194 #endif
195
196 #define         FALSE   0
197 #define         TRUE    1
198
199 /* state of output_mode and input_mode  
200
201    c2           0 means ASCII
202                 X0201
203                 ISO8859_1
204                 X0208
205                 EOF      all termination
206    c1           32bit data
207
208  */
209
210 #define         ASCII           0
211 #define         X0208           1
212 #define         X0201           2
213 #define         ISO8859_1       8
214 #define         NO_X0201        3
215 #define         X0212      0x2844
216 #define         X0213_1    0x284F
217 #define         X0213_2    0x2850
218
219 /* Input Assumption */
220
221 #define         JIS_INPUT       4
222 #define         EUC_INPUT      16
223 #define         SJIS_INPUT      5
224 #define         LATIN1_INPUT    6
225 #define         FIXED_MIME      7
226 #define         STRICT_MIME     8
227
228 /* MIME ENCODE */
229
230 #define         ISO2022JP       9
231 #define         JAPANESE_EUC   10
232 #define         SHIFT_JIS      11
233
234 #define         UTF8           12
235 #define         UTF8_INPUT     13
236 #define         UTF16_INPUT    1015
237 #define         UTF32_INPUT    1017
238
239 /* byte order */
240
241 #define         ENDIAN_BIG      1234
242 #define         ENDIAN_LITTLE   4321
243 #define         ENDIAN_2143     2143
244 #define         ENDIAN_3412     3412
245
246 #define         WISH_TRUE      15
247
248 /* ASCII CODE */
249
250 #define         BS      0x08
251 #define         TAB     0x09
252 #define         NL      0x0a
253 #define         CR      0x0d
254 #define         ESC     0x1b
255 #define         SPACE   0x20
256 #define         AT      0x40
257 #define         SSP     0xa0
258 #define         DEL     0x7f
259 #define         SI      0x0f
260 #define         SO      0x0e
261 #define         SSO     0x8e
262 #define         SS3     0x8f
263
264 #define         is_alnum(c)  \
265             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
266
267 /* I don't trust portablity of toupper */
268 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
269 #define nkf_isoctal(c)  ('0'<=c && c<='7')
270 #define nkf_isdigit(c)  ('0'<=c && c<='9')
271 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
272 #define nkf_isblank(c) (c == SPACE || c == TAB)
273 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
274 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
275 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
276 #define nkf_isprint(c) (' '<=c && c<='~')
277 #define nkf_isgraph(c) ('!'<=c && c<='~')
278 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
279                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
280                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0 )
281 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
282
283 #define CP932_TABLE_BEGIN 0xFA
284 #define CP932_TABLE_END   0xFC
285 #define CP932INV_TABLE_BEGIN 0xED
286 #define CP932INV_TABLE_END   0xEE
287 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
288
289 #define         HOLD_SIZE       1024
290 #if defined(INT_IS_SHORT)
291 #define         IOBUF_SIZE      2048
292 #else
293 #define         IOBUF_SIZE      16384
294 #endif
295
296 #define         DEFAULT_J       'B'
297 #define         DEFAULT_R       'B'
298
299 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
300 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
301
302 #define         RANGE_NUM_MAX   18
303 #define         GETA1   0x22
304 #define         GETA2   0x2e
305
306
307 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
308 #define sizeof_euc_to_utf8_1byte 94
309 #define sizeof_euc_to_utf8_2bytes 94
310 #define sizeof_utf8_to_euc_C2 64
311 #define sizeof_utf8_to_euc_E5B8 64
312 #define sizeof_utf8_to_euc_2bytes 112
313 #define sizeof_utf8_to_euc_3bytes 16
314 #endif
315
316 /* MIME preprocessor */
317
318 #ifdef EASYWIN /*Easy Win */
319 extern POINT _BufferSize;
320 #endif
321
322 struct input_code{
323     char *name;
324     nkf_char stat;
325     nkf_char score;
326     nkf_char index;
327     nkf_char buf[3];
328     void (*status_func)(struct input_code *, nkf_char);
329     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
330     int _file_stat;
331 };
332
333 static char *input_codename = "";
334
335 #ifndef PERL_XS
336 static const char *CopyRight = COPY_RIGHT;
337 #endif
338 #if !defined(PERL_XS) && !defined(WIN32DLL)
339 static  nkf_char     noconvert(FILE *f);
340 #endif
341 static  void    module_connection(void);
342 static  nkf_char     kanji_convert(FILE *f);
343 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
344 static  nkf_char     push_hold_buf(nkf_char c2);
345 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
346 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
347 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
348 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
349 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
350 /* UCS Mapping
351  * 0: Shift_JIS, eucJP-ascii
352  * 1: eucJP-ms
353  * 2: CP932, CP51932
354  */
355 #define UCS_MAP_ASCII 0
356 #define UCS_MAP_MS    1
357 #define UCS_MAP_CP932 2
358 static int ms_ucs_map_f = UCS_MAP_ASCII;
359 #endif
360 #ifdef UTF8_INPUT_ENABLE
361 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
362 static  int     no_cp932ext_f = FALSE;
363 /* ignore ZERO WIDTH NO-BREAK SPACE */
364 static  int     no_best_fit_chars_f = FALSE;
365 static  int     input_endian = ENDIAN_BIG;
366 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
367 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
368 static  void    encode_fallback_html(nkf_char c);
369 static  void    encode_fallback_xml(nkf_char c);
370 static  void    encode_fallback_java(nkf_char c);
371 static  void    encode_fallback_perl(nkf_char c);
372 static  void    encode_fallback_subchar(nkf_char c);
373 static  void    (*encode_fallback)(nkf_char c) = NULL;
374 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
375 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
376 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
377 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
378 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
379 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
380 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
381 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
382 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
383 static  void    w_status(struct input_code *, nkf_char);
384 #endif
385 #ifdef UTF8_OUTPUT_ENABLE
386 static  int     output_bom_f = FALSE;
387 static  int     output_endian = ENDIAN_BIG;
388 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
389 static  void    w_oconv(nkf_char c2,nkf_char c1);
390 static  void    w_oconv16(nkf_char c2,nkf_char c1);
391 static  void    w_oconv32(nkf_char c2,nkf_char c1);
392 #endif
393 static  void    e_oconv(nkf_char c2,nkf_char c1);
394 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
395 static  void    s_oconv(nkf_char c2,nkf_char c1);
396 static  void    j_oconv(nkf_char c2,nkf_char c1);
397 static  void    fold_conv(nkf_char c2,nkf_char c1);
398 static  void    cr_conv(nkf_char c2,nkf_char c1);
399 static  void    z_conv(nkf_char c2,nkf_char c1);
400 static  void    rot_conv(nkf_char c2,nkf_char c1);
401 static  void    hira_conv(nkf_char c2,nkf_char c1);
402 static  void    base64_conv(nkf_char c2,nkf_char c1);
403 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
404 static  void    no_connection(nkf_char c2,nkf_char c1);
405 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
406
407 static  void    code_score(struct input_code *ptr);
408 static  void    code_status(nkf_char c);
409
410 static  void    std_putc(nkf_char c);
411 static  nkf_char     std_getc(FILE *f);
412 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
413
414 static  nkf_char     broken_getc(FILE *f);
415 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
416
417 static  nkf_char     mime_begin(FILE *f);
418 static  nkf_char     mime_getc(FILE *f);
419 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
420
421 static  void    switch_mime_getc(void);
422 static  void    unswitch_mime_getc(void);
423 static  nkf_char     mime_begin_strict(FILE *f);
424 static  nkf_char     mime_getc_buf(FILE *f);
425 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
426 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
427
428 static  nkf_char     base64decode(nkf_char c);
429 static  void    mime_prechar(nkf_char c2, nkf_char c1);
430 static  void    mime_putc(nkf_char c);
431 static  void    open_mime(nkf_char c);
432 static  void    close_mime(void);
433 static  void    eof_mime(void);
434 static  void    mimeout_addchar(nkf_char c);
435 #ifndef PERL_XS
436 static  void    usage(void);
437 static  void    version(void);
438 #endif
439 static  void    options(unsigned char *c);
440 #if defined(PERL_XS) || defined(WIN32DLL)
441 static  void    reinit(void);
442 #endif
443
444 /* buffers */
445
446 #if !defined(PERL_XS) && !defined(WIN32DLL)
447 static unsigned char   stdibuf[IOBUF_SIZE];
448 static unsigned char   stdobuf[IOBUF_SIZE];
449 #endif
450 static unsigned char   hold_buf[HOLD_SIZE*2];
451 static int             hold_count = 0;
452
453 /* MIME preprocessor fifo */
454
455 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
456 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)   
457 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
458 static unsigned char           mime_buf[MIME_BUF_SIZE];
459 static unsigned int            mime_top = 0;
460 static unsigned int            mime_last = 0;  /* decoded */
461 static unsigned int            mime_input = 0; /* undecoded */
462 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
463
464 /* flags */
465 static int             unbuf_f = FALSE;
466 static int             estab_f = FALSE;
467 static int             nop_f = FALSE;
468 static int             binmode_f = TRUE;       /* binary mode */
469 static int             rot_f = FALSE;          /* rot14/43 mode */
470 static int             hira_f = FALSE;          /* hira/kata henkan */
471 static int             input_f = FALSE;        /* non fixed input code  */
472 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
473 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
474 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
475 static int             mimebuf_f = FALSE;      /* MIME buffered input */
476 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
477 static int             iso8859_f = FALSE;      /* ISO8859 through */
478 static int             mimeout_f = FALSE;       /* base64 mode */
479 #if defined(MSDOS) || defined(__OS2__) 
480 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
481 #else
482 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
483 #endif
484 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
485
486 #ifdef UNICODE_NORMALIZATION
487 static int nfc_f = FALSE;
488 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
489 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
490 static nkf_char nfc_getc(FILE *f);
491 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
492 #endif
493
494 #ifdef INPUT_OPTION
495 static int cap_f = FALSE;
496 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
497 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
498 static nkf_char cap_getc(FILE *f);
499 static nkf_char cap_ungetc(nkf_char c,FILE *f);
500
501 static int url_f = FALSE;
502 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
503 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
504 static nkf_char url_getc(FILE *f);
505 static nkf_char url_ungetc(nkf_char c,FILE *f);
506 #endif
507
508 #if defined(INT_IS_SHORT)
509 #define NKF_INT32_C(n)   (n##L)
510 #else
511 #define NKF_INT32_C(n)   (n)
512 #endif
513 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
514 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
515 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
516 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
517 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
518 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
519 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
520
521 #ifdef NUMCHAR_OPTION
522 static int numchar_f = FALSE;
523 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
524 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
525 static nkf_char numchar_getc(FILE *f);
526 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
527 #endif
528
529 #ifdef CHECK_OPTION
530 static int noout_f = FALSE;
531 static void no_putc(nkf_char c);
532 static nkf_char debug_f = FALSE;
533 static void debug(const char *str);
534 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
535 #endif
536
537 static int guess_f = FALSE;
538 #if !defined PERL_XS
539 static  void    print_guessed_code(char *filename);
540 #endif
541 static  void    set_input_codename(char *codename);
542 static int is_inputcode_mixed = FALSE;
543 static int is_inputcode_set   = FALSE;
544
545 #ifdef EXEC_IO
546 static int exec_f = 0;
547 #endif
548
549 #ifdef SHIFTJIS_CP932
550 /* invert IBM extended characters to others */
551 static int cp51932_f = TRUE;
552
553 /* invert NEC-selected IBM extended characters to IBM extended characters */
554 static int cp932inv_f = TRUE;
555
556 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
557 #endif /* SHIFTJIS_CP932 */
558
559 #ifdef X0212_ENABLE
560 static int x0212_f = FALSE;
561 static nkf_char x0212_shift(nkf_char c);
562 static nkf_char x0212_unshift(nkf_char c);
563 #endif
564 static int x0213_f = FALSE;
565
566 static unsigned char prefix_table[256];
567
568 static void set_code_score(struct input_code *ptr, nkf_char score);
569 static void clr_code_score(struct input_code *ptr, nkf_char score);
570 static void status_disable(struct input_code *ptr);
571 static void status_push_ch(struct input_code *ptr, nkf_char c);
572 static void status_clear(struct input_code *ptr);
573 static void status_reset(struct input_code *ptr);
574 static void status_reinit(struct input_code *ptr);
575 static void status_check(struct input_code *ptr, nkf_char c);
576 static void e_status(struct input_code *, nkf_char);
577 static void s_status(struct input_code *, nkf_char);
578
579 struct input_code input_code_list[] = {
580     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
581     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
582 #ifdef UTF8_INPUT_ENABLE
583     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
584     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
585     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
586 #endif
587     {0}
588 };
589
590 static int              mimeout_mode = 0;
591 static int              base64_count = 0;
592
593 /* X0208 -> ASCII converter */
594
595 /* fold parameter */
596 static int             f_line = 0;    /* chars in line */
597 static int             f_prev = 0;
598 static int             fold_preserve_f = FALSE; /* preserve new lines */
599 static int             fold_f  = FALSE;
600 static int             fold_len  = 0;
601
602 /* options */
603 static unsigned char   kanji_intro = DEFAULT_J;
604 static unsigned char   ascii_intro = DEFAULT_R;
605
606 /* Folding */
607
608 #define FOLD_MARGIN  10
609 #define DEFAULT_FOLD 60
610
611 static int             fold_margin  = FOLD_MARGIN;
612
613 /* converters */
614
615 #ifdef DEFAULT_CODE_JIS
616 #   define  DEFAULT_CONV j_oconv
617 #endif
618 #ifdef DEFAULT_CODE_SJIS
619 #   define  DEFAULT_CONV s_oconv
620 #endif
621 #ifdef DEFAULT_CODE_EUC
622 #   define  DEFAULT_CONV e_oconv
623 #endif
624 #ifdef DEFAULT_CODE_UTF8
625 #   define  DEFAULT_CONV w_oconv
626 #endif
627
628 /* process default */
629 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
630
631 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
632 /* s_iconv or oconv */
633 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
634
635 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
636 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
637 static void (*o_crconv)(nkf_char c2,nkf_char c1) = no_connection;
638 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
639 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
640 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
641 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
642
643 /* static redirections */
644
645 static  void   (*o_putc)(nkf_char c) = std_putc;
646
647 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
648 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
649
650 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
651 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
652
653 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
654
655 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
656 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
657
658 /* for strict mime */
659 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
660 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
661
662 /* Global states */
663 static int output_mode = ASCII,    /* output kanji mode */
664            input_mode =  ASCII,    /* input kanji mode */
665            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
666 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
667
668 /* X0201 / X0208 conversion tables */
669
670 /* X0201 kana conversion table */
671 /* 90-9F A0-DF */
672 static const
673 unsigned char cv[]= {
674     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
675     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
676     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
677     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
678     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
679     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
680     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
681     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
682     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
683     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
684     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
685     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
686     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
687     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
688     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
689     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
690     0x00,0x00};
691
692
693 /* X0201 kana conversion table for daguten */
694 /* 90-9F A0-DF */
695 static const
696 unsigned char dv[]= { 
697     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
698     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
699     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
700     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
701     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
702     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
703     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
704     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
705     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
706     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
707     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
708     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
709     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
710     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
711     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
712     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
713     0x00,0x00};
714
715 /* X0201 kana conversion table for han-daguten */
716 /* 90-9F A0-DF */
717 static const
718 unsigned char ev[]= { 
719     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
720     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
721     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
722     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
723     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
724     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
725     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
726     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
727     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
728     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
729     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
730     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
731     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
732     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
733     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
734     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
735     0x00,0x00};
736
737
738 /* X0208 kigou conversion table */
739 /* 0x8140 - 0x819e */
740 static const
741 unsigned char fv[] = {
742
743     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
744     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
745     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
746     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
747     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
748     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
749     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
750     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
751     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
752     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
753     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
754     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
755 } ;
756
757
758 #define    CRLF      1
759
760 static int             file_out_f = FALSE;
761 #ifdef OVERWRITE
762 static int             overwrite_f = FALSE;
763 static int             preserve_time_f = FALSE;
764 static int             backup_f = FALSE;
765 static char            *backup_suffix = "";
766 static char *get_backup_filename(const char *suffix, const char *filename);
767 #endif
768
769 static int             crmode_f = 0;   /* CR, NL, CRLF */
770 #ifdef EASYWIN /*Easy Win */
771 static int             end_check;
772 #endif /*Easy Win */
773
774 #define STD_GC_BUFSIZE (256)
775 nkf_char std_gc_buf[STD_GC_BUFSIZE];
776 nkf_char std_gc_ndx;
777
778 #ifdef WIN32DLL
779 #include "nkf32dll.c"
780 #elif defined(PERL_XS)
781 #else /* WIN32DLL */
782 int main(int argc, char **argv)
783 {
784     FILE  *fin;
785     unsigned char  *cp;
786
787     char *outfname = NULL;
788     char *origfname;
789
790 #ifdef EASYWIN /*Easy Win */
791     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
792 #endif
793
794     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
795         cp = (unsigned char *)*argv;
796         options(cp);
797 #ifdef EXEC_IO
798         if (exec_f){
799             int fds[2], pid;
800             if (pipe(fds) < 0 || (pid = fork()) < 0){
801                 abort();
802             }
803             if (pid == 0){
804                 if (exec_f > 0){
805                     close(fds[0]);
806                     dup2(fds[1], 1);
807                 }else{
808                     close(fds[1]);
809                     dup2(fds[0], 0);
810                 }
811                 execvp(argv[1], &argv[1]);
812             }
813             if (exec_f > 0){
814                 close(fds[1]);
815                 dup2(fds[0], 0);
816             }else{
817                 close(fds[0]);
818                 dup2(fds[1], 1);
819             }
820             argc = 0;
821             break;
822         }
823 #endif
824     }
825     if(x0201_f == WISH_TRUE)
826          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
827
828     if (binmode_f == TRUE)
829 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
830     if (freopen("","wb",stdout) == NULL) 
831         return (-1);
832 #else
833     setbinmode(stdout);
834 #endif
835
836     if (unbuf_f)
837       setbuf(stdout, (char *) NULL);
838     else
839       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
840
841     if (argc == 0) {
842       if (binmode_f == TRUE)
843 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
844       if (freopen("","rb",stdin) == NULL) return (-1);
845 #else
846       setbinmode(stdin);
847 #endif
848       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
849       if (nop_f)
850           noconvert(stdin);
851       else {
852           kanji_convert(stdin);
853           if (guess_f) print_guessed_code(NULL);
854       }
855     } else {
856       int nfiles = argc;
857         int is_argument_error = FALSE;
858       while (argc--) {
859             is_inputcode_mixed = FALSE;
860             is_inputcode_set   = FALSE;
861             input_codename = "";
862 #ifdef CHECK_OPTION
863             iconv_for_check = 0;
864 #endif
865           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
866               perror(*--argv);
867                 *argv++;
868                 is_argument_error = TRUE;
869                 continue;
870           } else {
871 #ifdef OVERWRITE
872               int fd = 0;
873               int fd_backup = 0;
874 #endif
875
876 /* reopen file for stdout */
877               if (file_out_f == TRUE) {
878 #ifdef OVERWRITE
879                   if (overwrite_f){
880                       outfname = malloc(strlen(origfname)
881                                         + strlen(".nkftmpXXXXXX")
882                                         + 1);
883                       if (!outfname){
884                           perror(origfname);
885                           return -1;
886                       }
887                       strcpy(outfname, origfname);
888 #ifdef MSDOS
889                       {
890                           int i;
891                           for (i = strlen(outfname); i; --i){
892                               if (outfname[i - 1] == '/'
893                                   || outfname[i - 1] == '\\'){
894                                   break;
895                               }
896                           }
897                           outfname[i] = '\0';
898                       }
899                       strcat(outfname, "ntXXXXXX");
900                       mktemp(outfname);
901                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
902                                 S_IREAD | S_IWRITE);
903 #else
904                       strcat(outfname, ".nkftmpXXXXXX");
905                       fd = mkstemp(outfname);
906 #endif
907                       if (fd < 0
908                           || (fd_backup = dup(fileno(stdout))) < 0
909                           || dup2(fd, fileno(stdout)) < 0
910                           ){
911                           perror(origfname);
912                           return -1;
913                       }
914                   }else
915 #endif
916                   if(argc == 1 ) {
917                       outfname = *argv++;
918                       argc--;
919                   } else {
920                       outfname = "nkf.out";
921                   }
922
923                   if(freopen(outfname, "w", stdout) == NULL) {
924                       perror (outfname);
925                       return (-1);
926                   }
927                   if (binmode_f == TRUE) {
928 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
929                       if (freopen("","wb",stdout) == NULL) 
930                            return (-1);
931 #else
932                       setbinmode(stdout);
933 #endif
934                   }
935               }
936               if (binmode_f == TRUE)
937 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
938                  if (freopen("","rb",fin) == NULL) 
939                     return (-1);
940 #else
941                  setbinmode(fin);
942 #endif 
943               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
944               if (nop_f)
945                   noconvert(fin);
946               else {
947                   char *filename = NULL;
948                   kanji_convert(fin);
949                   if (nfiles > 1) filename = origfname;
950                   if (guess_f) print_guessed_code(filename);
951               }
952               fclose(fin);
953 #ifdef OVERWRITE
954               if (overwrite_f) {
955                   struct stat     sb;
956 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
957                   time_t tb[2];
958 #else
959                   struct utimbuf  tb;
960 #endif
961
962                   fflush(stdout);
963                   close(fd);
964                   if (dup2(fd_backup, fileno(stdout)) < 0){
965                       perror("dup2");
966                   }
967                   if (stat(origfname, &sb)) {
968                       fprintf(stderr, "Can't stat %s\n", origfname);
969                   }
970                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
971                   if (chmod(outfname, sb.st_mode)) {
972                       fprintf(stderr, "Can't set permission %s\n", outfname);
973                   }
974
975                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
976                     if(preserve_time_f){
977 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
978                         tb[0] = tb[1] = sb.st_mtime;
979                         if (utime(outfname, tb)) {
980                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
981                         }
982 #else
983                         tb.actime  = sb.st_atime;
984                         tb.modtime = sb.st_mtime;
985                         if (utime(outfname, &tb)) {
986                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
987                         }
988 #endif
989                     }
990                     if(backup_f){
991                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
992 #ifdef MSDOS
993                         unlink(backup_filename);
994 #endif
995                         if (rename(origfname, backup_filename)) {
996                             perror(backup_filename);
997                             fprintf(stderr, "Can't rename %s to %s\n",
998                                     origfname, backup_filename);
999                         }
1000                     }else{
1001 #ifdef MSDOS
1002                         if (unlink(origfname)){
1003                             perror(origfname);
1004                         }
1005 #endif
1006                     }
1007                   if (rename(outfname, origfname)) {
1008                       perror(origfname);
1009                       fprintf(stderr, "Can't rename %s to %s\n",
1010                               outfname, origfname);
1011                   }
1012                   free(outfname);
1013               }
1014 #endif
1015           }
1016       }
1017         if (is_argument_error)
1018             return(-1);
1019     }
1020 #ifdef EASYWIN /*Easy Win */
1021     if (file_out_f == FALSE) 
1022         scanf("%d",&end_check);
1023     else 
1024         fclose(stdout);
1025 #else /* for Other OS */
1026     if (file_out_f == TRUE) 
1027         fclose(stdout);
1028 #endif /*Easy Win */
1029     return (0);
1030 }
1031 #endif /* WIN32DLL */
1032
1033 #ifdef OVERWRITE
1034 char *get_backup_filename(const char *suffix, const char *filename)
1035 {
1036     char *backup_filename;
1037     int asterisk_count = 0;
1038     int i, j;
1039     int filename_length = strlen(filename);
1040
1041     for(i = 0; suffix[i]; i++){
1042         if(suffix[i] == '*') asterisk_count++;
1043     }
1044
1045     if(asterisk_count){
1046         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1047         if (!backup_filename){
1048             perror("Can't malloc backup filename.");
1049             return NULL;
1050         }
1051
1052         for(i = 0, j = 0; suffix[i];){
1053             if(suffix[i] == '*'){
1054                 backup_filename[j] = '\0';
1055                 strncat(backup_filename, filename, filename_length);
1056                 i++;
1057                 j += filename_length;
1058             }else{
1059                 backup_filename[j++] = suffix[i++];
1060             }
1061         }
1062         backup_filename[j] = '\0';
1063     }else{
1064         j = strlen(suffix) + filename_length;
1065         backup_filename = malloc( + 1);
1066         strcpy(backup_filename, filename);
1067         strcat(backup_filename, suffix);
1068         backup_filename[j] = '\0';
1069     }
1070     return backup_filename;
1071 }
1072 #endif
1073
1074 static const
1075 struct {
1076     const char *name;
1077     const char *alias;
1078 } long_option[] = {
1079     {"ic=", ""},
1080     {"oc=", ""},
1081     {"base64","jMB"},
1082     {"euc","e"},
1083     {"euc-input","E"},
1084     {"fj","jm"},
1085     {"help","v"},
1086     {"jis","j"},
1087     {"jis-input","J"},
1088     {"mac","sLm"},
1089     {"mime","jM"},
1090     {"mime-input","m"},
1091     {"msdos","sLw"},
1092     {"sjis","s"},
1093     {"sjis-input","S"},
1094     {"unix","eLu"},
1095     {"version","V"},
1096     {"windows","sLw"},
1097     {"hiragana","h1"},
1098     {"katakana","h2"},
1099     {"katakana-hiragana","h3"},
1100     {"guess", "g"},
1101     {"cp932", ""},
1102     {"no-cp932", ""},
1103 #ifdef X0212_ENABLE
1104     {"x0212", ""},
1105 #endif
1106 #ifdef UTF8_OUTPUT_ENABLE
1107     {"utf8", "w"},
1108     {"utf16", "w16"},
1109     {"ms-ucs-map", ""},
1110     {"fb-skip", ""},
1111     {"fb-html", ""},
1112     {"fb-xml", ""},
1113     {"fb-perl", ""},
1114     {"fb-java", ""},
1115     {"fb-subchar", ""},
1116     {"fb-subchar=", ""},
1117 #endif
1118 #ifdef UTF8_INPUT_ENABLE
1119     {"utf8-input", "W"},
1120     {"utf16-input", "W16"},
1121     {"no-cp932ext", ""},
1122     {"no-best-fit-chars",""},
1123 #endif
1124 #ifdef UNICODE_NORMALIZATION
1125     {"utf8mac-input", ""},
1126 #endif
1127 #ifdef OVERWRITE
1128     {"overwrite", ""},
1129     {"overwrite=", ""},
1130     {"in-place", ""},
1131     {"in-place=", ""},
1132 #endif
1133 #ifdef INPUT_OPTION
1134     {"cap-input", ""},
1135     {"url-input", ""},
1136 #endif
1137 #ifdef NUMCHAR_OPTION
1138     {"numchar-input", ""},
1139 #endif
1140 #ifdef CHECK_OPTION
1141     {"no-output", ""},
1142     {"debug", ""},
1143 #endif
1144 #ifdef SHIFTJIS_CP932
1145     {"cp932inv", ""},
1146 #endif
1147 #ifdef EXEC_IO
1148     {"exec-in", ""},
1149     {"exec-out", ""},
1150 #endif
1151     {"prefix=", ""},
1152 };
1153
1154 static int option_mode = 0;
1155
1156 void options(unsigned char *cp)
1157 {
1158     nkf_char i, j;
1159     unsigned char *p;
1160     unsigned char *cp_back = NULL;
1161     char codeset[32];
1162
1163     if (option_mode==1)
1164         return;
1165     while(*cp && *cp++!='-');
1166     while (*cp || cp_back) {
1167         if(!*cp){
1168             cp = cp_back;
1169             cp_back = NULL;
1170             continue;
1171         }
1172         p = 0;
1173         switch (*cp++) {
1174         case '-':  /* literal options */
1175             if (!*cp || *cp == SPACE) {        /* ignore the rest of arguments */
1176                 option_mode = 1;
1177                 return;
1178             }
1179             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1180                 p = (unsigned char *)long_option[i].name;
1181                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1182                 if (*p == cp[j] || cp[j] == ' '){
1183                     p = &cp[j] + 1;
1184                     break;
1185                 }
1186                 p = 0;
1187             }
1188             if (p == 0) return;
1189             while(*cp && *cp != SPACE && cp++);
1190             if (long_option[i].alias[0]){
1191                 cp_back = cp;
1192                 cp = (unsigned char *)long_option[i].alias;
1193             }else{
1194                 if (strcmp(long_option[i].name, "ic=") == 0){
1195                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1196                         codeset[i] = nkf_toupper(p[i]);
1197                     }
1198                     codeset[i] = 0;
1199                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1200                         input_f = JIS_INPUT;
1201                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1202                       strcmp(codeset, "CP50220") == 0 ||
1203                       strcmp(codeset, "CP50221") == 0 ||
1204                       strcmp(codeset, "CP50222") == 0){
1205                         input_f = JIS_INPUT;
1206 #ifdef SHIFTJIS_CP932
1207                         cp51932_f = TRUE;
1208 #endif
1209 #ifdef UTF8_OUTPUT_ENABLE
1210                         ms_ucs_map_f = UCS_MAP_CP932;
1211 #endif
1212                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1213                         input_f = JIS_INPUT;
1214 #ifdef X0212_ENABLE
1215                         x0212_f = TRUE;
1216 #endif
1217                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1218                         input_f = JIS_INPUT;
1219 #ifdef X0212_ENABLE
1220                         x0212_f = TRUE;
1221 #endif
1222                         x0213_f = TRUE;
1223                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1224                         input_f = SJIS_INPUT;
1225                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1226                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1227                              strcmp(codeset, "CP932") == 0 ||
1228                              strcmp(codeset, "MS932") == 0){
1229                         input_f = SJIS_INPUT;
1230 #ifdef SHIFTJIS_CP932
1231                         cp51932_f = TRUE;
1232 #endif
1233 #ifdef UTF8_OUTPUT_ENABLE
1234                         ms_ucs_map_f = UCS_MAP_CP932;
1235 #endif
1236                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1237                              strcmp(codeset, "EUC-JP") == 0){
1238                         input_f = EUC_INPUT;
1239                     }else if(strcmp(codeset, "CP51932") == 0){
1240                         input_f = EUC_INPUT;
1241 #ifdef SHIFTJIS_CP932
1242                         cp51932_f = TRUE;
1243 #endif
1244 #ifdef UTF8_OUTPUT_ENABLE
1245                         ms_ucs_map_f = UCS_MAP_CP932;
1246 #endif
1247                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1248                              strcmp(codeset, "EUCJP-MS") == 0 ||
1249                              strcmp(codeset, "EUCJPMS") == 0){
1250                         input_f = EUC_INPUT;
1251 #ifdef SHIFTJIS_CP932
1252                         cp51932_f = FALSE;
1253 #endif
1254 #ifdef UTF8_OUTPUT_ENABLE
1255                         ms_ucs_map_f = UCS_MAP_MS;
1256 #endif
1257                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1258                              strcmp(codeset, "EUCJP-ASCII") == 0){
1259                         input_f = EUC_INPUT;
1260 #ifdef SHIFTJIS_CP932
1261                         cp51932_f = FALSE;
1262 #endif
1263 #ifdef UTF8_OUTPUT_ENABLE
1264                         ms_ucs_map_f = UCS_MAP_ASCII;
1265 #endif
1266                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1267                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1268                         input_f = SJIS_INPUT;
1269                         x0213_f = TRUE;
1270 #ifdef SHIFTJIS_CP932
1271                         cp51932_f = FALSE;
1272                         cp932inv_f = FALSE;
1273 #endif
1274                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1275                              strcmp(codeset, "EUC-JIS-2004") == 0){
1276                         input_f = EUC_INPUT;
1277                         x0213_f = TRUE;
1278 #ifdef SHIFTJIS_CP932
1279                         cp51932_f = FALSE;
1280                         cp932inv_f = FALSE;
1281 #endif
1282 #ifdef UTF8_INPUT_ENABLE
1283                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1284                              strcmp(codeset, "UTF-8N") == 0 ||
1285                              strcmp(codeset, "UTF-8-BOM") == 0){
1286                         input_f = UTF8_INPUT;
1287 #ifdef UNICODE_NORMALIZATION
1288                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1289                              strcmp(codeset, "UTF-8-MAC") == 0){
1290                         input_f = UTF8_INPUT;
1291                         nfc_f = TRUE;
1292 #endif
1293                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1294                              strcmp(codeset, "UTF-16BE") == 0 ||
1295                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1296                         input_f = UTF16_INPUT;
1297                         input_endian = ENDIAN_BIG;
1298                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1299                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1300                         input_f = UTF16_INPUT;
1301                         input_endian = ENDIAN_LITTLE;
1302                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1303                              strcmp(codeset, "UTF-32BE") == 0 ||
1304                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1305                         input_f = UTF32_INPUT;
1306                         input_endian = ENDIAN_BIG;
1307                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1308                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1309                         input_f = UTF32_INPUT;
1310                         input_endian = ENDIAN_LITTLE;
1311 #endif
1312                     }
1313                     continue;
1314                 }
1315                 if (strcmp(long_option[i].name, "oc=") == 0){
1316                     x0201_f = FALSE;
1317                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1318                         codeset[i] = nkf_toupper(p[i]);
1319                     }
1320                     codeset[i] = 0;
1321                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1322                         output_conv = j_oconv;
1323                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1324                         output_conv = j_oconv;
1325                         no_cp932ext_f = TRUE;
1326 #ifdef SHIFTJIS_CP932
1327                         cp51932_f = TRUE;
1328 #endif
1329 #ifdef UTF8_OUTPUT_ENABLE
1330                         ms_ucs_map_f = UCS_MAP_CP932;
1331 #endif
1332                     }else if(strcmp(codeset, "CP50220") == 0){
1333                         output_conv = j_oconv;
1334                         x0201_f = TRUE;
1335 #ifdef SHIFTJIS_CP932
1336                         cp51932_f = TRUE;
1337 #endif
1338 #ifdef UTF8_OUTPUT_ENABLE
1339                         ms_ucs_map_f = UCS_MAP_CP932;
1340 #endif
1341                     }else if(strcmp(codeset, "CP50221") == 0){
1342                         output_conv = j_oconv;
1343 #ifdef SHIFTJIS_CP932
1344                         cp51932_f = TRUE;
1345 #endif
1346 #ifdef UTF8_OUTPUT_ENABLE
1347                         ms_ucs_map_f = UCS_MAP_CP932;
1348 #endif
1349                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1350                         output_conv = j_oconv;
1351 #ifdef X0212_ENABLE
1352                         x0212_f = TRUE;
1353 #endif
1354 #ifdef SHIFTJIS_CP932
1355                         cp51932_f = FALSE;
1356 #endif
1357                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1358                         output_conv = j_oconv;
1359 #ifdef X0212_ENABLE
1360                         x0212_f = TRUE;
1361 #endif
1362                         x0213_f = TRUE;
1363 #ifdef SHIFTJIS_CP932
1364                         cp51932_f = FALSE;
1365 #endif
1366                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1367                         output_conv = s_oconv;
1368                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1369                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1370                              strcmp(codeset, "CP932") == 0 ||
1371                              strcmp(codeset, "MS932") == 0){
1372                         output_conv = s_oconv;
1373 #ifdef SHIFTJIS_CP932
1374                         cp51932_f = TRUE;
1375                         cp932inv_f = TRUE;
1376 #endif
1377 #ifdef UTF8_OUTPUT_ENABLE
1378                         ms_ucs_map_f = UCS_MAP_CP932;
1379 #endif
1380                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1381                              strcmp(codeset, "EUC-JP") == 0){
1382                         output_conv = e_oconv;
1383                     }else if(strcmp(codeset, "CP51932") == 0){
1384                         output_conv = e_oconv;
1385 #ifdef SHIFTJIS_CP932
1386                         cp51932_f = TRUE;
1387 #endif
1388 #ifdef UTF8_OUTPUT_ENABLE
1389                         ms_ucs_map_f = UCS_MAP_CP932;
1390 #endif
1391                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1392                              strcmp(codeset, "EUCJP-MS") == 0 ||
1393                              strcmp(codeset, "EUCJPMS") == 0){
1394                         output_conv = e_oconv;
1395 #ifdef X0212_ENABLE
1396                         x0212_f = TRUE;
1397 #endif
1398 #ifdef SHIFTJIS_CP932
1399                         cp51932_f = FALSE;
1400 #endif
1401 #ifdef UTF8_OUTPUT_ENABLE
1402                         ms_ucs_map_f = UCS_MAP_MS;
1403 #endif
1404                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1405                              strcmp(codeset, "EUCJP-ASCII") == 0){
1406                         output_conv = e_oconv;
1407 #ifdef X0212_ENABLE
1408                         x0212_f = TRUE;
1409 #endif
1410 #ifdef SHIFTJIS_CP932
1411                         cp51932_f = FALSE;
1412 #endif
1413 #ifdef UTF8_OUTPUT_ENABLE
1414                         ms_ucs_map_f = UCS_MAP_ASCII;
1415 #endif
1416                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1417                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1418                         output_conv = s_oconv;
1419                         x0213_f = TRUE;
1420 #ifdef SHIFTJIS_CP932
1421                         cp932inv_f = FALSE;
1422 #endif
1423                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1424                              strcmp(codeset, "EUC-JIS-2004") == 0){
1425                         output_conv = e_oconv;
1426 #ifdef X0212_ENABLE
1427                         x0212_f = TRUE;
1428 #endif
1429                         x0213_f = TRUE;
1430 #ifdef SHIFTJIS_CP932
1431                         cp51932_f = FALSE;
1432 #endif
1433 #ifdef UTF8_OUTPUT_ENABLE
1434                     }else if(strcmp(codeset, "UTF-8") == 0){
1435                         output_conv = w_oconv;
1436                     }else if(strcmp(codeset, "UTF-8N") == 0){
1437                         output_conv = w_oconv;
1438                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1439                         output_conv = w_oconv;
1440                         output_bom_f = TRUE;
1441                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1442                         output_conv = w_oconv16;
1443                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1444                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1445                         output_conv = w_oconv16;
1446                         output_bom_f = TRUE;
1447                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1448                         output_conv = w_oconv16;
1449                         output_endian = ENDIAN_LITTLE;
1450                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1451                         output_conv = w_oconv16;
1452                         output_endian = ENDIAN_LITTLE;
1453                         output_bom_f = TRUE;
1454                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1455                              strcmp(codeset, "UTF-32BE") == 0){
1456                         output_conv = w_oconv32;
1457                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1458                         output_conv = w_oconv32;
1459                         output_bom_f = TRUE;
1460                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1461                         output_conv = w_oconv32;
1462                         output_endian = ENDIAN_LITTLE;
1463                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1464                         output_conv = w_oconv32;
1465                         output_endian = ENDIAN_LITTLE;
1466                         output_bom_f = TRUE;
1467 #endif
1468                     }
1469                     continue;
1470                 }
1471 #ifdef OVERWRITE
1472                 if (strcmp(long_option[i].name, "overwrite") == 0){
1473                     file_out_f = TRUE;
1474                     overwrite_f = TRUE;
1475                     preserve_time_f = TRUE;
1476                     continue;
1477                 }
1478                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1479                     file_out_f = TRUE;
1480                     overwrite_f = TRUE;
1481                     preserve_time_f = TRUE;
1482                     backup_f = TRUE;
1483                     backup_suffix = malloc(strlen((char *) p) + 1);
1484                     strcpy(backup_suffix, (char *) p);
1485                     continue;
1486                 }
1487                 if (strcmp(long_option[i].name, "in-place") == 0){
1488                     file_out_f = TRUE;
1489                     overwrite_f = TRUE;
1490                     preserve_time_f = FALSE;
1491                     continue;
1492                 }
1493                 if (strcmp(long_option[i].name, "in-place=") == 0){
1494                     file_out_f = TRUE;
1495                     overwrite_f = TRUE;
1496                     preserve_time_f = FALSE;
1497                     backup_f = TRUE;
1498                     backup_suffix = malloc(strlen((char *) p) + 1);
1499                     strcpy(backup_suffix, (char *) p);
1500                     continue;
1501                 }
1502 #endif
1503 #ifdef INPUT_OPTION
1504                 if (strcmp(long_option[i].name, "cap-input") == 0){
1505                     cap_f = TRUE;
1506                     continue;
1507                 }
1508                 if (strcmp(long_option[i].name, "url-input") == 0){
1509                     url_f = TRUE;
1510                     continue;
1511                 }
1512 #endif
1513 #ifdef NUMCHAR_OPTION
1514                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1515                     numchar_f = TRUE;
1516                     continue;
1517                 }
1518 #endif
1519 #ifdef CHECK_OPTION
1520                 if (strcmp(long_option[i].name, "no-output") == 0){
1521                     noout_f = TRUE;
1522                     continue;
1523                 }
1524                 if (strcmp(long_option[i].name, "debug") == 0){
1525                     debug_f = TRUE;
1526                     continue;
1527                 }
1528 #endif
1529                 if (strcmp(long_option[i].name, "cp932") == 0){
1530 #ifdef SHIFTJIS_CP932
1531                     cp51932_f = TRUE;
1532                     cp932inv_f = TRUE;
1533 #endif
1534 #ifdef UTF8_OUTPUT_ENABLE
1535                     ms_ucs_map_f = UCS_MAP_CP932;
1536 #endif
1537                     continue;
1538                 }
1539                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1540 #ifdef SHIFTJIS_CP932
1541                     cp51932_f = FALSE;
1542                     cp932inv_f = FALSE;
1543 #endif
1544 #ifdef UTF8_OUTPUT_ENABLE
1545                     ms_ucs_map_f = UCS_MAP_ASCII;
1546 #endif
1547                     continue;
1548                 }
1549 #ifdef SHIFTJIS_CP932
1550                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1551                     cp932inv_f = TRUE;
1552                     continue;
1553                 }
1554 #endif
1555
1556 #ifdef X0212_ENABLE
1557                 if (strcmp(long_option[i].name, "x0212") == 0){
1558                     x0212_f = TRUE;
1559                     continue;
1560                 }
1561 #endif
1562
1563 #ifdef EXEC_IO
1564                   if (strcmp(long_option[i].name, "exec-in") == 0){
1565                       exec_f = 1;
1566                       return;
1567                   }
1568                   if (strcmp(long_option[i].name, "exec-out") == 0){
1569                       exec_f = -1;
1570                       return;
1571                   }
1572 #endif
1573 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1574                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1575                     no_cp932ext_f = TRUE;
1576                     continue;
1577                 }
1578                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1579                     no_best_fit_chars_f = TRUE;
1580                     continue;
1581                 }
1582                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1583                     encode_fallback = NULL;
1584                     continue;
1585                 }
1586                 if (strcmp(long_option[i].name, "fb-html") == 0){
1587                     encode_fallback = encode_fallback_html;
1588                     continue;
1589                 }
1590                 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1591                     encode_fallback = encode_fallback_xml;
1592                     continue;
1593                 }
1594                 if (strcmp(long_option[i].name, "fb-java") == 0){
1595                     encode_fallback = encode_fallback_java;
1596                     continue;
1597                 }
1598                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1599                     encode_fallback = encode_fallback_perl;
1600                     continue;
1601                 }
1602                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1603                     encode_fallback = encode_fallback_subchar;
1604                     continue;
1605                 }
1606                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1607                     encode_fallback = encode_fallback_subchar;
1608                     unicode_subchar = 0;
1609                     if (p[0] != '0'){
1610                         /* decimal number */
1611                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1612                             unicode_subchar *= 10;
1613                             unicode_subchar += hex2bin(p[i]);
1614                         }
1615                     }else if(p[1] == 'x' || p[1] == 'X'){
1616                         /* hexadecimal number */
1617                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1618                             unicode_subchar <<= 4;
1619                             unicode_subchar |= hex2bin(p[i]);
1620                         }
1621                     }else{
1622                         /* octal number */
1623                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1624                             unicode_subchar *= 8;
1625                             unicode_subchar += hex2bin(p[i]);
1626                         }
1627                     }
1628                     w16e_conv(unicode_subchar, &i, &j);
1629                     unicode_subchar = i<<8 | j;
1630                     continue;
1631                 }
1632 #endif
1633 #ifdef UTF8_OUTPUT_ENABLE
1634                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1635                     ms_ucs_map_f = UCS_MAP_MS;
1636                     continue;
1637                 }
1638 #endif
1639 #ifdef UNICODE_NORMALIZATION
1640                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1641                     input_f = UTF8_INPUT;
1642                     nfc_f = TRUE;
1643                     continue;
1644                 }
1645 #endif
1646                 if (strcmp(long_option[i].name, "prefix=") == 0){
1647                     if (nkf_isgraph(p[0])){
1648                         for (i = 1; nkf_isgraph(p[i]); i++){
1649                             prefix_table[p[i]] = p[0];
1650                         }
1651                     }
1652                     continue;
1653                 }
1654             }
1655             continue;
1656         case 'b':           /* buffered mode */
1657             unbuf_f = FALSE;
1658             continue;
1659         case 'u':           /* non bufferd mode */
1660             unbuf_f = TRUE;
1661             continue;
1662         case 't':           /* transparent mode */
1663             if (*cp=='1') {
1664                 /* alias of -t */
1665                 nop_f = TRUE;
1666                 *cp++;
1667             } else if (*cp=='2') {
1668                 /*
1669                  * -t with put/get
1670                  *
1671                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1672                  *
1673                  */
1674                 nop_f = 2;
1675                 *cp++;
1676             } else
1677                 nop_f = TRUE;
1678             continue;
1679         case 'j':           /* JIS output */
1680         case 'n':
1681             output_conv = j_oconv;
1682             continue;
1683         case 'e':           /* AT&T EUC output */
1684             output_conv = e_oconv;
1685             continue;
1686         case 's':           /* SJIS output */
1687             output_conv = s_oconv;
1688             continue;
1689         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1690             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1691             input_f = LATIN1_INPUT;
1692             continue;
1693         case 'i':           /* Kanji IN ESC-$-@/B */
1694             if (*cp=='@'||*cp=='B') 
1695                 kanji_intro = *cp++;
1696             continue;
1697         case 'o':           /* ASCII IN ESC-(-J/B */
1698             if (*cp=='J'||*cp=='B'||*cp=='H') 
1699                 ascii_intro = *cp++;
1700             continue;
1701         case 'h':
1702             /*  
1703                 bit:1   katakana->hiragana
1704                 bit:2   hiragana->katakana
1705             */
1706             if ('9'>= *cp && *cp>='0') 
1707                 hira_f |= (*cp++ -'0');
1708             else 
1709                 hira_f |= 1;
1710             continue;
1711         case 'r':
1712             rot_f = TRUE;
1713             continue;
1714 #if defined(MSDOS) || defined(__OS2__) 
1715         case 'T':
1716             binmode_f = FALSE;
1717             continue;
1718 #endif
1719 #ifndef PERL_XS
1720         case 'V':
1721             version();
1722             exit(1);
1723             break;
1724         case 'v':
1725             usage();
1726             exit(1);
1727             break;
1728 #endif
1729 #ifdef UTF8_OUTPUT_ENABLE
1730         case 'w':           /* UTF-8 output */
1731             if (cp[0] == '8') {
1732                 output_conv = w_oconv; cp++;
1733                 if (cp[0] == '0'){
1734                     cp++;
1735                 } else {
1736                     output_bom_f = TRUE;
1737                 }
1738             } else {
1739                 if ('1'== cp[0] && '6'==cp[1]) {
1740                     output_conv = w_oconv16; cp+=2;
1741                 } else if ('3'== cp[0] && '2'==cp[1]) {
1742                     output_conv = w_oconv32; cp+=2;
1743                 } else {
1744                     output_conv = w_oconv;
1745                     continue;
1746                 }
1747                 if (cp[0]=='L') {
1748                     cp++;
1749                     output_endian = ENDIAN_LITTLE;
1750                 } else if (cp[0] == 'B') {
1751                     cp++;
1752                 } else {
1753                     continue;
1754                 }
1755                 if (cp[0] == '0'){
1756                     cp++;
1757                 } else {
1758                     output_bom_f = TRUE;
1759                 }
1760             }
1761             continue;
1762 #endif
1763 #ifdef UTF8_INPUT_ENABLE
1764         case 'W':           /* UTF input */
1765             if (cp[0] == '8') {
1766                 cp++;
1767                 input_f = UTF8_INPUT;
1768             }else{
1769                 if ('1'== cp[0] && '6'==cp[1]) {
1770                     cp += 2;
1771                     input_f = UTF16_INPUT;
1772                     input_endian = ENDIAN_BIG;
1773                 } else if ('3'== cp[0] && '2'==cp[1]) {
1774                     cp += 2;
1775                     input_f = UTF32_INPUT;
1776                     input_endian = ENDIAN_BIG;
1777                 } else {
1778                     input_f = UTF8_INPUT;
1779                     continue;
1780                 }
1781                 if (cp[0]=='L') {
1782                     cp++;
1783                     input_endian = ENDIAN_LITTLE;
1784                 } else if (cp[0] == 'B') {
1785                     cp++;
1786                 }
1787             }
1788             continue;
1789 #endif
1790         /* Input code assumption */
1791         case 'J':   /* JIS input */
1792             input_f = JIS_INPUT;
1793             continue;
1794         case 'E':   /* AT&T EUC input */
1795             input_f = EUC_INPUT;
1796             continue;
1797         case 'S':   /* MS Kanji input */
1798             input_f = SJIS_INPUT;
1799             if (x0201_f==NO_X0201) x0201_f=TRUE;
1800             continue;
1801         case 'Z':   /* Convert X0208 alphabet to asii */
1802             /*  bit:0   Convert X0208
1803                 bit:1   Convert Kankaku to one space
1804                 bit:2   Convert Kankaku to two spaces
1805                 bit:3   Convert HTML Entity
1806             */
1807             if ('9'>= *cp && *cp>='0') 
1808                 alpha_f |= 1<<(*cp++ -'0');
1809             else 
1810                 alpha_f |= TRUE;
1811             continue;
1812         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1813             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1814             /* accept  X0201
1815                     ESC-(-I     in JIS, EUC, MS Kanji
1816                     SI/SO       in JIS, EUC, MS Kanji
1817                     SSO         in EUC, JIS, not in MS Kanji
1818                     MS Kanji (0xa0-0xdf) 
1819                output  X0201
1820                     ESC-(-I     in JIS (0x20-0x5f)
1821                     SSO         in EUC (0xa0-0xdf)
1822                     0xa0-0xd    in MS Kanji (0xa0-0xdf) 
1823             */
1824             continue;
1825         case 'X':   /* Assume X0201 kana */
1826             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1827             x0201_f = TRUE;
1828             continue;
1829         case 'F':   /* prserve new lines */
1830             fold_preserve_f = TRUE;
1831         case 'f':   /* folding -f60 or -f */
1832             fold_f = TRUE;
1833             fold_len = 0;
1834             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1835                 fold_len *= 10;
1836                 fold_len += *cp++ - '0';
1837             }
1838             if (!(0<fold_len && fold_len<BUFSIZ)) 
1839                 fold_len = DEFAULT_FOLD;
1840             if (*cp=='-') {
1841                 fold_margin = 0;
1842                 cp++;
1843                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1844                     fold_margin *= 10;
1845                     fold_margin += *cp++ - '0';
1846                 }
1847             }
1848             continue;
1849         case 'm':   /* MIME support */
1850             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1851             if (*cp=='B'||*cp=='Q') {
1852                 mime_decode_mode = *cp++;
1853                 mimebuf_f = FIXED_MIME;
1854             } else if (*cp=='N') {
1855                 mime_f = TRUE; cp++;
1856             } else if (*cp=='S') {
1857                 mime_f = STRICT_MIME; cp++;
1858             } else if (*cp=='0') {
1859                 mime_decode_f = FALSE;
1860                 mime_f = FALSE; cp++;
1861             }
1862             continue;
1863         case 'M':   /* MIME output */
1864             if (*cp=='B') {
1865                 mimeout_mode = 'B';
1866                 mimeout_f = FIXED_MIME; cp++;
1867             } else if (*cp=='Q') {
1868                 mimeout_mode = 'Q';
1869                 mimeout_f = FIXED_MIME; cp++;
1870             } else {
1871                 mimeout_f = TRUE;
1872             }
1873             continue;
1874         case 'B':   /* Broken JIS support */
1875             /*  bit:0   no ESC JIS
1876                 bit:1   allow any x on ESC-(-x or ESC-$-x
1877                 bit:2   reset to ascii on NL
1878             */
1879             if ('9'>= *cp && *cp>='0') 
1880                 broken_f |= 1<<(*cp++ -'0');
1881             else 
1882                 broken_f |= TRUE;
1883             continue;
1884 #ifndef PERL_XS
1885         case 'O':/* for Output file */
1886             file_out_f = TRUE;
1887             continue;
1888 #endif
1889         case 'c':/* add cr code */
1890             crmode_f = CRLF;
1891             continue;
1892         case 'd':/* delete cr code */
1893             crmode_f = NL;
1894             continue;
1895         case 'I':   /* ISO-2022-JP output */
1896             iso2022jp_f = TRUE;
1897             continue;
1898         case 'L':  /* line mode */
1899             if (*cp=='u') {         /* unix */
1900                 crmode_f = NL; cp++;
1901             } else if (*cp=='m') { /* mac */
1902                 crmode_f = CR; cp++;
1903             } else if (*cp=='w') { /* windows */
1904                 crmode_f = CRLF; cp++;
1905             } else if (*cp=='0') { /* no conversion  */
1906                 crmode_f = 0; cp++;
1907             }
1908             continue;
1909         case 'g':
1910 #ifndef PERL_XS
1911             guess_f = TRUE;
1912 #endif
1913             continue;
1914         case ' ':    
1915         /* module muliple options in a string are allowed for Perl moudle  */
1916             while(*cp && *cp++!='-');
1917             continue;
1918         default:
1919             /* bogus option but ignored */
1920             continue;
1921         }
1922     }
1923 }
1924
1925 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1926 {
1927     if (iconv_func){
1928         struct input_code *p = input_code_list;
1929         while (p->name){
1930             if (iconv_func == p->iconv_func){
1931                 return p;
1932             }
1933             p++;
1934         }
1935     }
1936     return 0;
1937 }
1938
1939 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1940 {
1941 #ifdef INPUT_CODE_FIX
1942     if (f || !input_f)
1943 #endif
1944         if (estab_f != f){
1945             estab_f = f;
1946         }
1947
1948     if (iconv_func
1949 #ifdef INPUT_CODE_FIX
1950         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1951 #endif
1952         ){
1953         iconv = iconv_func;
1954     }
1955 #ifdef CHECK_OPTION
1956     if (estab_f && iconv_for_check != iconv){
1957         struct input_code *p = find_inputcode_byfunc(iconv);
1958         if (p){
1959             set_input_codename(p->name);
1960             debug(input_codename);
1961         }
1962         iconv_for_check = iconv;
1963     }
1964 #endif
1965 }
1966
1967 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1968 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1969 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1970 #ifdef SHIFTJIS_CP932
1971 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B */
1972 #define SCORE_NO_EXIST (SCORE_CP932 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1973 #else
1974 #define SCORE_NO_EXIST (SCORE_DEPEND << 1)   /* \e$BB8:_$7$J$$J8;z\e(B */
1975 #endif
1976 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1977 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1978
1979 #define SCORE_INIT (SCORE_iMIME)
1980
1981 const nkf_char score_table_A0[] = {
1982     0, 0, 0, 0,
1983     0, 0, 0, 0,
1984     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1985     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1986 };
1987
1988 const nkf_char score_table_F0[] = {
1989     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1990     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1991     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1992     SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1993 };
1994
1995 void set_code_score(struct input_code *ptr, nkf_char score)
1996 {
1997     if (ptr){
1998         ptr->score |= score;
1999     }
2000 }
2001
2002 void clr_code_score(struct input_code *ptr, nkf_char score)
2003 {
2004     if (ptr){
2005         ptr->score &= ~score;
2006     }
2007 }
2008
2009 void code_score(struct input_code *ptr)
2010 {
2011     nkf_char c2 = ptr->buf[0];
2012 #ifdef UTF8_OUTPUT_ENABLE
2013     nkf_char c1 = ptr->buf[1];
2014 #endif
2015     if (c2 < 0){
2016         set_code_score(ptr, SCORE_ERROR);
2017     }else if (c2 == SSO){
2018         set_code_score(ptr, SCORE_KANA);
2019 #ifdef UTF8_OUTPUT_ENABLE
2020     }else if (!e2w_conv(c2, c1)){
2021         set_code_score(ptr, SCORE_NO_EXIST);
2022 #endif
2023     }else if ((c2 & 0x70) == 0x20){
2024         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2025     }else if ((c2 & 0x70) == 0x70){
2026         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2027     }else if ((c2 & 0x70) >= 0x50){
2028         set_code_score(ptr, SCORE_L2);
2029     }
2030 }
2031
2032 void status_disable(struct input_code *ptr)
2033 {
2034     ptr->stat = -1;
2035     ptr->buf[0] = -1;
2036     code_score(ptr);
2037     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2038 }
2039
2040 void status_push_ch(struct input_code *ptr, nkf_char c)
2041 {
2042     ptr->buf[ptr->index++] = c;
2043 }
2044
2045 void status_clear(struct input_code *ptr)
2046 {
2047     ptr->stat = 0;
2048     ptr->index = 0;
2049 }
2050
2051 void status_reset(struct input_code *ptr)
2052 {
2053     status_clear(ptr);
2054     ptr->score = SCORE_INIT;
2055 }
2056
2057 void status_reinit(struct input_code *ptr)
2058 {
2059     status_reset(ptr);
2060     ptr->_file_stat = 0;
2061 }
2062
2063 void status_check(struct input_code *ptr, nkf_char c)
2064 {
2065     if (c <= DEL && estab_f){
2066         status_reset(ptr);
2067     }
2068 }
2069
2070 void s_status(struct input_code *ptr, nkf_char c)
2071 {
2072     switch(ptr->stat){
2073       case -1:
2074           status_check(ptr, c);
2075           break;
2076       case 0:
2077           if (c <= DEL){
2078               break;
2079 #ifdef NUMCHAR_OPTION
2080           }else if (is_unicode_capsule(c)){
2081               break;
2082 #endif
2083           }else if (0xa1 <= c && c <= 0xdf){
2084               status_push_ch(ptr, SSO);
2085               status_push_ch(ptr, c);
2086               code_score(ptr);
2087               status_clear(ptr);
2088           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2089               ptr->stat = 1;
2090               status_push_ch(ptr, c);
2091 #ifdef SHIFTJIS_CP932
2092           }else if (cp51932_f
2093                     && is_ibmext_in_sjis(c)){
2094               ptr->stat = 2;
2095               status_push_ch(ptr, c);
2096 #endif /* SHIFTJIS_CP932 */
2097 #ifdef X0212_ENABLE
2098           }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2099               ptr->stat = 1;
2100               status_push_ch(ptr, c);
2101 #endif /* X0212_ENABLE */
2102           }else{
2103               status_disable(ptr);
2104           }
2105           break;
2106       case 1:
2107           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2108               status_push_ch(ptr, c);
2109               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2110               code_score(ptr);
2111               status_clear(ptr);
2112           }else{
2113               status_disable(ptr);
2114           }
2115           break;
2116       case 2:
2117 #ifdef SHIFTJIS_CP932
2118           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2119               status_push_ch(ptr, c);
2120               if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2121                   set_code_score(ptr, SCORE_CP932);
2122                   status_clear(ptr);
2123                   break;
2124               }
2125           }
2126 #endif /* SHIFTJIS_CP932 */
2127 #ifndef X0212_ENABLE
2128           status_disable(ptr);
2129 #endif
2130           break;
2131     }
2132 }
2133
2134 void e_status(struct input_code *ptr, nkf_char c)
2135 {
2136     switch (ptr->stat){
2137       case -1:
2138           status_check(ptr, c);
2139           break;
2140       case 0:
2141           if (c <= DEL){
2142               break;
2143 #ifdef NUMCHAR_OPTION
2144           }else if (is_unicode_capsule(c)){
2145               break;
2146 #endif
2147           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2148               ptr->stat = 1;
2149               status_push_ch(ptr, c);
2150 #ifdef X0212_ENABLE
2151           }else if (0x8f == c){
2152               ptr->stat = 2;
2153               status_push_ch(ptr, c);
2154 #endif /* X0212_ENABLE */
2155           }else{
2156               status_disable(ptr);
2157           }
2158           break;
2159       case 1:
2160           if (0xa1 <= c && c <= 0xfe){
2161               status_push_ch(ptr, c);
2162               code_score(ptr);
2163               status_clear(ptr);
2164           }else{
2165               status_disable(ptr);
2166           }
2167           break;
2168 #ifdef X0212_ENABLE
2169       case 2:
2170           if (0xa1 <= c && c <= 0xfe){
2171               ptr->stat = 1;
2172               status_push_ch(ptr, c);
2173           }else{
2174               status_disable(ptr);
2175           }
2176 #endif /* X0212_ENABLE */
2177     }
2178 }
2179
2180 #ifdef UTF8_INPUT_ENABLE
2181 void w_status(struct input_code *ptr, nkf_char c)
2182 {
2183     switch (ptr->stat){
2184       case -1:
2185           status_check(ptr, c);
2186           break;
2187       case 0:
2188           if (c <= DEL){
2189               break;
2190 #ifdef NUMCHAR_OPTION
2191           }else if (is_unicode_capsule(c)){
2192               break;
2193 #endif
2194           }else if (0xc0 <= c && c <= 0xdf){
2195               ptr->stat = 1;
2196               status_push_ch(ptr, c);
2197           }else if (0xe0 <= c && c <= 0xef){
2198               ptr->stat = 2;
2199               status_push_ch(ptr, c);
2200           }else if (0xf0 <= c && c <= 0xf4){
2201               ptr->stat = 3;
2202               status_push_ch(ptr, c);
2203           }else{
2204               status_disable(ptr);
2205           }
2206           break;
2207       case 1:
2208       case 2:
2209           if (0x80 <= c && c <= 0xbf){
2210               status_push_ch(ptr, c);
2211               if (ptr->index > ptr->stat){
2212                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2213                              && ptr->buf[2] == 0xbf);
2214                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2215                            &ptr->buf[0], &ptr->buf[1]);
2216                   if (!bom){
2217                       code_score(ptr);
2218                   }
2219                   status_clear(ptr);
2220               }
2221           }else{
2222               status_disable(ptr);
2223           }
2224           break;
2225       case 3:
2226         if (0x80 <= c && c <= 0xbf){
2227             if (ptr->index < ptr->stat){
2228                 status_push_ch(ptr, c);
2229             } else {
2230                 status_clear(ptr);
2231             }
2232           }else{
2233               status_disable(ptr);
2234           }
2235           break;
2236     }
2237 }
2238 #endif
2239
2240 void code_status(nkf_char c)
2241 {
2242     int action_flag = 1;
2243     struct input_code *result = 0;
2244     struct input_code *p = input_code_list;
2245     while (p->name){
2246         if (!p->status_func) {
2247             ++p;
2248             continue;
2249         }
2250         if (!p->status_func)
2251             continue;
2252         (p->status_func)(p, c);
2253         if (p->stat > 0){
2254             action_flag = 0;
2255         }else if(p->stat == 0){
2256             if (result){
2257                 action_flag = 0;
2258             }else{
2259                 result = p;
2260             }
2261         }
2262         ++p;
2263     }
2264
2265     if (action_flag){
2266         if (result && !estab_f){
2267             set_iconv(TRUE, result->iconv_func);
2268         }else if (c <= DEL){
2269             struct input_code *ptr = input_code_list;
2270             while (ptr->name){
2271                 status_reset(ptr);
2272                 ++ptr;
2273             }
2274         }
2275     }
2276 }
2277
2278 #ifndef WIN32DLL
2279 nkf_char std_getc(FILE *f)
2280 {
2281     if (std_gc_ndx){
2282         return std_gc_buf[--std_gc_ndx];
2283     }
2284     return getc(f);
2285 }
2286 #endif /*WIN32DLL*/
2287
2288 nkf_char std_ungetc(nkf_char c, FILE *f)
2289 {
2290     if (std_gc_ndx == STD_GC_BUFSIZE){
2291         return EOF;
2292     }
2293     std_gc_buf[std_gc_ndx++] = c;
2294     return c;
2295 }
2296
2297 #ifndef WIN32DLL
2298 void std_putc(nkf_char c)
2299 {
2300     if(c!=EOF)
2301       putchar(c);
2302 }
2303 #endif /*WIN32DLL*/
2304
2305 #if !defined(PERL_XS) && !defined(WIN32DLL)
2306 nkf_char noconvert(FILE *f)
2307 {
2308     nkf_char    c;
2309
2310     if (nop_f == 2)
2311         module_connection();
2312     while ((c = (*i_getc)(f)) != EOF)
2313       (*o_putc)(c);
2314     (*o_putc)(EOF);
2315     return 1;
2316 }
2317 #endif
2318
2319 void module_connection(void)
2320 {
2321     oconv = output_conv; 
2322     o_putc = std_putc;
2323
2324     /* replace continucation module, from output side */
2325
2326     /* output redicrection */
2327 #ifdef CHECK_OPTION
2328     if (noout_f || guess_f){
2329         o_putc = no_putc;
2330     }
2331 #endif
2332     if (mimeout_f) {
2333         o_mputc = o_putc;
2334         o_putc = mime_putc;
2335         if (mimeout_f == TRUE) {
2336             o_base64conv = oconv; oconv = base64_conv;
2337         }
2338         /* base64_count = 0; */
2339     }
2340
2341     if (crmode_f) {
2342         o_crconv = oconv; oconv = cr_conv;
2343     }
2344     if (rot_f) {
2345         o_rot_conv = oconv; oconv = rot_conv;
2346     }
2347     if (iso2022jp_f) {
2348         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2349     }
2350     if (hira_f) {
2351         o_hira_conv = oconv; oconv = hira_conv;
2352     }
2353     if (fold_f) {
2354         o_fconv = oconv; oconv = fold_conv;
2355         f_line = 0;
2356     }
2357     if (alpha_f || x0201_f) {
2358         o_zconv = oconv; oconv = z_conv;
2359     }
2360
2361     i_getc = std_getc;
2362     i_ungetc = std_ungetc;
2363     /* input redicrection */
2364 #ifdef INPUT_OPTION
2365     if (cap_f){
2366         i_cgetc = i_getc; i_getc = cap_getc;
2367         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2368     }
2369     if (url_f){
2370         i_ugetc = i_getc; i_getc = url_getc;
2371         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2372     }
2373 #endif
2374 #ifdef NUMCHAR_OPTION
2375     if (numchar_f){
2376         i_ngetc = i_getc; i_getc = numchar_getc;
2377         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2378     }
2379 #endif
2380 #ifdef UNICODE_NORMALIZATION
2381     if (nfc_f && input_f == UTF8_INPUT){
2382         i_nfc_getc = i_getc; i_getc = nfc_getc;
2383         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2384     }
2385 #endif
2386     if (mime_f && mimebuf_f==FIXED_MIME) {
2387         i_mgetc = i_getc; i_getc = mime_getc;
2388         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2389     }
2390     if (broken_f & 1) {
2391         i_bgetc = i_getc; i_getc = broken_getc;
2392         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2393     }
2394     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2395         set_iconv(-TRUE, e_iconv);
2396     } else if (input_f == SJIS_INPUT) {
2397         set_iconv(-TRUE, s_iconv);
2398 #ifdef UTF8_INPUT_ENABLE
2399     } else if (input_f == UTF8_INPUT) {
2400         set_iconv(-TRUE, w_iconv);
2401     } else if (input_f == UTF16_INPUT) {
2402         set_iconv(-TRUE, w_iconv16);
2403     } else if (input_f == UTF32_INPUT) {
2404         set_iconv(-TRUE, w_iconv32);
2405 #endif
2406     } else {
2407         set_iconv(FALSE, e_iconv);
2408     }
2409
2410     {
2411         struct input_code *p = input_code_list;
2412         while (p->name){
2413             status_reinit(p++);
2414         }
2415     }
2416 }
2417
2418 /*
2419  * Check and Ignore BOM
2420  */
2421 void check_bom(FILE *f)
2422 {
2423     int c2;
2424     switch(c2 = (*i_getc)(f)){
2425     case 0x00:
2426         if((c2 = (*i_getc)(f)) == 0x00){
2427             if((c2 = (*i_getc)(f)) == 0xFE){
2428                 if((c2 = (*i_getc)(f)) == 0xFF){
2429                     if(!input_f){
2430                         set_iconv(TRUE, w_iconv32);
2431                     }
2432                     if (iconv == w_iconv32) {
2433                         input_endian = ENDIAN_BIG;
2434                         return;
2435                     }
2436                     (*i_ungetc)(0xFF,f);
2437                 }else (*i_ungetc)(c2,f);
2438                 (*i_ungetc)(0xFE,f);
2439             }else if(c2 == 0xFF){
2440                 if((c2 = (*i_getc)(f)) == 0xFE){
2441                     if(!input_f){
2442                         set_iconv(TRUE, w_iconv32);
2443                     }
2444                     if (iconv == w_iconv32) {
2445                         input_endian = ENDIAN_2143;
2446                         return;
2447                     }
2448                     (*i_ungetc)(0xFF,f);
2449                 }else (*i_ungetc)(c2,f);
2450                 (*i_ungetc)(0xFF,f);
2451             }else (*i_ungetc)(c2,f);
2452             (*i_ungetc)(0x00,f);
2453         }else (*i_ungetc)(c2,f);
2454         (*i_ungetc)(0x00,f);
2455         break;
2456     case 0xEF:
2457         if((c2 = (*i_getc)(f)) == 0xBB){
2458             if((c2 = (*i_getc)(f)) == 0xBF){
2459                 if(!input_f){
2460                     set_iconv(TRUE, w_iconv);
2461                 }
2462                 if (iconv == w_iconv) {
2463                     return;
2464                 }
2465                 (*i_ungetc)(0xBF,f);
2466             }else (*i_ungetc)(c2,f);
2467             (*i_ungetc)(0xBB,f);
2468         }else (*i_ungetc)(c2,f);
2469         (*i_ungetc)(0xEF,f);
2470         break;
2471     case 0xFE:
2472         if((c2 = (*i_getc)(f)) == 0xFF){
2473             if((c2 = (*i_getc)(f)) == 0x00){
2474                 if((c2 = (*i_getc)(f)) == 0x00){
2475                     if(!input_f){
2476                         set_iconv(TRUE, w_iconv32);
2477                     }
2478                     if (iconv == w_iconv32) {
2479                         input_endian = ENDIAN_3412;
2480                         return;
2481                     }
2482                     (*i_ungetc)(0x00,f);
2483                 }else (*i_ungetc)(c2,f);
2484                 (*i_ungetc)(0x00,f);
2485             }else (*i_ungetc)(c2,f);
2486             if(!input_f){
2487                 set_iconv(TRUE, w_iconv16);
2488             }
2489             if (iconv == w_iconv16) {
2490                 input_endian = ENDIAN_BIG;
2491                 return;
2492             }
2493             (*i_ungetc)(0xFF,f);
2494         }else (*i_ungetc)(c2,f);
2495         (*i_ungetc)(0xFE,f);
2496         break;
2497     case 0xFF:
2498         if((c2 = (*i_getc)(f)) == 0xFE){
2499             if((c2 = (*i_getc)(f)) == 0x00){
2500                 if((c2 = (*i_getc)(f)) == 0x00){
2501                     if(!input_f){
2502                         set_iconv(TRUE, w_iconv32);
2503                     }
2504                     if (iconv == w_iconv32) {
2505                         input_endian = ENDIAN_LITTLE;
2506                         return;
2507                     }
2508                     (*i_ungetc)(0x00,f);
2509                 }else (*i_ungetc)(c2,f);
2510                 (*i_ungetc)(0x00,f);
2511             }else (*i_ungetc)(c2,f);
2512             if(!input_f){
2513                 set_iconv(TRUE, w_iconv16);
2514             }
2515             if (iconv == w_iconv16) {
2516                 input_endian = ENDIAN_LITTLE;
2517                 return;
2518             }
2519             (*i_ungetc)(0xFE,f);
2520         }else (*i_ungetc)(c2,f);
2521         (*i_ungetc)(0xFF,f);
2522         break;
2523     default:
2524         (*i_ungetc)(c2,f);
2525         break;
2526     }
2527 }
2528
2529 /*
2530    Conversion main loop. Code detection only. 
2531  */
2532
2533 nkf_char kanji_convert(FILE *f)
2534 {
2535     nkf_char    c3, c2=0, c1, c0=0;
2536     int is_8bit = FALSE;
2537
2538     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2539 #ifdef UTF8_INPUT_ENABLE
2540        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2541 #endif
2542       ){
2543         is_8bit = TRUE;
2544     }
2545
2546     input_mode = ASCII;
2547     output_mode = ASCII;
2548     shift_mode = FALSE;
2549
2550 #define NEXT continue      /* no output, get next */
2551 #define SEND ;             /* output c1 and c2, get next */
2552 #define LAST break         /* end of loop, go closing  */
2553
2554     module_connection();
2555     check_bom(f);
2556
2557     while ((c1 = (*i_getc)(f)) != EOF) {
2558 #ifdef INPUT_CODE_FIX
2559         if (!input_f)
2560 #endif
2561             code_status(c1);
2562         if (c2) {
2563             /* second byte */
2564             if (c2 > DEL) {
2565                 /* in case of 8th bit is on */
2566                 if (!estab_f&&!mime_decode_mode) {
2567                     /* in case of not established yet */
2568                     /* It is still ambiguious */
2569                     if (h_conv(f, c2, c1)==EOF) 
2570                         LAST;
2571                     else 
2572                         c2 = 0;
2573                     NEXT;
2574                 } else {
2575                     /* in case of already established */
2576                     if (c1 < AT && !(X0208 && 0x80 <= c2 && c2 <= 0x92)) {
2577                         /* ignore bogus code and not CP5022x UCD */
2578                         c2 = 0;
2579                         NEXT;
2580                     } else {
2581                         SEND;
2582                     }
2583                 }
2584             } else
2585                 /* second byte, 7 bit code */
2586                 /* it might be kanji shitfted */
2587                 if ((c1 == DEL) || (c1 <= SPACE)) {
2588                     /* ignore bogus first code */
2589                     c2 = 0;
2590                     NEXT;
2591                 } else
2592                     SEND;
2593         } else {
2594             /* first byte */
2595 #ifdef UTF8_INPUT_ENABLE
2596             if (iconv == w_iconv16) {
2597                 if (input_endian == ENDIAN_BIG) {
2598                     c2 = c1;
2599                     if ((c1 = (*i_getc)(f)) != EOF) {
2600                         if (0xD8 <= c2 && c2 <= 0xDB) {
2601                             if ((c0 = (*i_getc)(f)) != EOF) {
2602                                 c0 <<= 8;
2603                                 if ((c3 = (*i_getc)(f)) != EOF) {
2604                                     c0 |= c3;
2605                                 } else c2 = EOF;
2606                             } else c2 = EOF;
2607                         }
2608                     } else c2 = EOF;
2609                 } else {
2610                     if ((c2 = (*i_getc)(f)) != EOF) {
2611                         if (0xD8 <= c2 && c2 <= 0xDB) {
2612                             if ((c3 = (*i_getc)(f)) != EOF) {
2613                                 if ((c0 = (*i_getc)(f)) != EOF) {
2614                                     c0 <<= 8;
2615                                     c0 |= c3;
2616                                 } else c2 = EOF;
2617                             } else c2 = EOF;
2618                         }
2619                     } else c2 = EOF;
2620                 }
2621                 SEND;
2622             } else if(iconv == w_iconv32){
2623                 int c3 = c1;
2624                 if((c2 = (*i_getc)(f)) != EOF &&
2625                    (c1 = (*i_getc)(f)) != EOF &&
2626                    (c0 = (*i_getc)(f)) != EOF){
2627                     switch(input_endian){
2628                     case ENDIAN_BIG:
2629                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2630                         break;
2631                     case ENDIAN_LITTLE:
2632                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2633                         break;
2634                     case ENDIAN_2143:
2635                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2636                         break;
2637                     case ENDIAN_3412:
2638                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2639                         break;
2640                     }
2641                     c2 = 0;
2642                 }else{
2643                     c2 = EOF;
2644                 }
2645                 SEND;
2646             } else
2647 #endif
2648 #ifdef NUMCHAR_OPTION
2649             if (is_unicode_capsule(c1)){
2650                 SEND;
2651             } else
2652 #endif
2653             if (c1 > DEL) {
2654                 /* 8 bit code */
2655                 if (!estab_f && !iso8859_f) {
2656                     /* not established yet */
2657                     c2 = c1;
2658                     NEXT;
2659                 } else { /* estab_f==TRUE */
2660                     if (iso8859_f) {
2661                         c2 = ISO8859_1;
2662                         c1 &= 0x7f;
2663                         SEND;
2664                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2665                         /* SJIS X0201 Case... */
2666                         if(iso2022jp_f && x0201_f==NO_X0201) {
2667                             (*oconv)(GETA1, GETA2);
2668                             NEXT;
2669                         } else {
2670                             c2 = X0201;
2671                             c1 &= 0x7f;
2672                             SEND;
2673                         }
2674                     } else if (c1==SSO && iconv != s_iconv) {
2675                         /* EUC X0201 Case */
2676                         c1 = (*i_getc)(f);  /* skip SSO */
2677                         code_status(c1);
2678                         if (SSP<=c1 && c1<0xe0) {
2679                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2680                                 (*oconv)(GETA1, GETA2);
2681                                 NEXT;
2682                             } else {
2683                                 c2 = X0201;
2684                                 c1 &= 0x7f;
2685                                 SEND;
2686                             }
2687                         } else  { /* bogus code, skip SSO and one byte */
2688                             NEXT;
2689                         }
2690                     } else {
2691                        /* already established */
2692                        c2 = c1;
2693                        NEXT;
2694                     }
2695                 }
2696             } else if ((c1 > SPACE) && (c1 != DEL)) {
2697                 /* in case of Roman characters */
2698                 if (shift_mode) { 
2699                     /* output 1 shifted byte */
2700                     if (iso8859_f) {
2701                         c2 = ISO8859_1;
2702                         SEND;
2703                     } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2704                       /* output 1 shifted byte */
2705                         if(iso2022jp_f && x0201_f==NO_X0201) {
2706                             (*oconv)(GETA1, GETA2);
2707                             NEXT;
2708                         } else {
2709                             c2 = X0201;
2710                             SEND;
2711                         }
2712                     } else {
2713                         /* look like bogus code */
2714                         NEXT;
2715                     }
2716                 } else if (input_mode == X0208 || input_mode == X0212 ||
2717                            input_mode == X0213_1 || input_mode == X0213_2) {
2718                     /* in case of Kanji shifted */
2719                     c2 = c1;
2720                     NEXT;
2721                 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2722                     /* Check MIME code */
2723                     if ((c1 = (*i_getc)(f)) == EOF) {
2724                         (*oconv)(0, '=');
2725                         LAST;
2726                     } else if (c1 == '?') {
2727                         /* =? is mime conversion start sequence */
2728                         if(mime_f == STRICT_MIME) {
2729                             /* check in real detail */
2730                             if (mime_begin_strict(f) == EOF) 
2731                                 LAST;
2732                             else
2733                                 NEXT;
2734                         } else if (mime_begin(f) == EOF) 
2735                             LAST;
2736                         else
2737                             NEXT;
2738                     } else {
2739                         (*oconv)(0, '=');
2740                         (*i_ungetc)(c1,f);
2741                         NEXT;
2742                     }
2743                 } else {
2744                     /* normal ASCII code */ 
2745                     SEND;
2746                 }
2747             } else if (!is_8bit && c1 == SI) {
2748                 shift_mode = FALSE; 
2749                 NEXT;
2750             } else if (!is_8bit && c1 == SO) {
2751                 shift_mode = TRUE; 
2752                 NEXT;
2753             } else if (!is_8bit && c1 == ESC ) {
2754                 if ((c1 = (*i_getc)(f)) == EOF) {
2755                     /*  (*oconv)(0, ESC); don't send bogus code */
2756                     LAST;
2757                 } else if (c1 == '$') {
2758                     if ((c1 = (*i_getc)(f)) == EOF) {
2759                         /*
2760                         (*oconv)(0, ESC); don't send bogus code 
2761                         (*oconv)(0, '$'); */
2762                         LAST;
2763                     } else if (c1 == '@'|| c1 == 'B') {
2764                         /* This is kanji introduction */
2765                         input_mode = X0208;
2766                         shift_mode = FALSE;
2767                         set_input_codename("ISO-2022-JP");
2768 #ifdef CHECK_OPTION
2769                         debug(input_codename);
2770 #endif
2771                         NEXT;
2772                     } else if (c1 == '(') {
2773                         if ((c1 = (*i_getc)(f)) == EOF) {
2774                             /* don't send bogus code 
2775                             (*oconv)(0, ESC);
2776                             (*oconv)(0, '$');
2777                             (*oconv)(0, '(');
2778                                 */
2779                             LAST;
2780                         } else if (c1 == '@'|| c1 == 'B') {
2781                             /* This is kanji introduction */
2782                             input_mode = X0208;
2783                             shift_mode = FALSE;
2784                             NEXT;
2785 #ifdef X0212_ENABLE
2786                         } else if (c1 == 'D'){
2787                             input_mode = X0212;
2788                             shift_mode = FALSE;
2789                             NEXT;
2790 #endif /* X0212_ENABLE */
2791                         } else if (c1 == (X0213_1&0x7F)){
2792                             input_mode = X0213_1;
2793                             shift_mode = FALSE;
2794                             NEXT;
2795                         } else if (c1 == (X0213_2&0x7F)){
2796                             input_mode = X0213_2;
2797                             shift_mode = FALSE;
2798                             NEXT;
2799                         } else {
2800                             /* could be some special code */
2801                             (*oconv)(0, ESC);
2802                             (*oconv)(0, '$');
2803                             (*oconv)(0, '(');
2804                             (*oconv)(0, c1);
2805                             NEXT;
2806                         }
2807                     } else if (broken_f&0x2) {
2808                         /* accept any ESC-(-x as broken code ... */
2809                         input_mode = X0208;
2810                         shift_mode = FALSE;
2811                         NEXT;
2812                     } else {
2813                         (*oconv)(0, ESC);
2814                         (*oconv)(0, '$');
2815                         (*oconv)(0, c1);
2816                         NEXT;
2817                     }
2818                 } else if (c1 == '(') {
2819                     if ((c1 = (*i_getc)(f)) == EOF) {
2820                         /* don't send bogus code 
2821                         (*oconv)(0, ESC);
2822                         (*oconv)(0, '('); */
2823                         LAST;
2824                     } else {
2825                         if (c1 == 'I') {
2826                             /* This is X0201 kana introduction */
2827                             input_mode = X0201; shift_mode = X0201;
2828                             NEXT;
2829                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2830                             /* This is X0208 kanji introduction */
2831                             input_mode = ASCII; shift_mode = FALSE;
2832                             NEXT;
2833                         } else if (broken_f&0x2) {
2834                             input_mode = ASCII; shift_mode = FALSE;
2835                             NEXT;
2836                         } else {
2837                             (*oconv)(0, ESC);
2838                             (*oconv)(0, '(');
2839                             /* maintain various input_mode here */
2840                             SEND;
2841                         }
2842                     }
2843                } else if ( c1 == 'N' || c1 == 'n' ){
2844                    /* SS2 */
2845                    c3 = (*i_getc)(f);  /* skip SS2 */
2846                    if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2847                        c1 = c3;
2848                        c2 = X0201;
2849                        SEND;
2850                    }else{
2851                        (*i_ungetc)(c3, f);
2852                        /* lonely ESC  */
2853                        (*oconv)(0, ESC);
2854                        SEND;
2855                    }
2856                 } else {
2857                     /* lonely ESC  */
2858                     (*oconv)(0, ESC);
2859                     SEND;
2860                 }
2861             } else if (c1 == ESC && iconv == s_iconv) {
2862                 /* ESC in Shift_JIS */
2863                 if ((c1 = (*i_getc)(f)) == EOF) {
2864                     /*  (*oconv)(0, ESC); don't send bogus code */
2865                     LAST;
2866                 } else if (c1 == '$') {
2867                     /* J-PHONE emoji */
2868                     if ((c1 = (*i_getc)(f)) == EOF) {
2869                         /*
2870                            (*oconv)(0, ESC); don't send bogus code 
2871                            (*oconv)(0, '$'); */
2872                         LAST;
2873                     } else {
2874                         if (('E' <= c1 && c1 <= 'G') ||
2875                             ('O' <= c1 && c1 <= 'Q')) {
2876                             /*
2877                                NUM : 0 1 2 3 4 5
2878                                BYTE: G E F O P Q
2879                                C%7 : 1 6 0 2 3 4
2880                                C%7 : 0 1 2 3 4 5 6
2881                                NUM : 2 0 3 4 5 X 1
2882                              */
2883                             static const int jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2884                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SPACE + 0xE000 + CLASS_UNICODE;
2885                             while ((c1 = (*i_getc)(f)) != EOF) {
2886                                 if (SPACE <= c1 && c1 <= 'z') {
2887                                     (*oconv)(0, c1 + c0);
2888                                 } else break; /* c1 == SO */
2889                             }
2890                         }
2891                     }
2892                     if (c1 == EOF) LAST;
2893                     NEXT;
2894                 } else {
2895                     /* lonely ESC  */
2896                     (*oconv)(0, ESC);
2897                     SEND;
2898                 }
2899             } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2900                 input_mode = ASCII; set_iconv(FALSE, 0);
2901                 SEND;
2902             } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2903                 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2904                     i_ungetc(SPACE,f);
2905                     continue;
2906                 } else {
2907                     i_ungetc(c1,f);
2908                 }
2909                 c1 = NL;
2910                 SEND;
2911             } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2912                 if ((c1=(*i_getc)(f))!=EOF) {
2913                     if (c1==SPACE) {
2914                         i_ungetc(SPACE,f);
2915                         continue;
2916                     } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2917                         i_ungetc(SPACE,f);
2918                         continue;
2919                     } else {
2920                         i_ungetc(c1,f);
2921                     }
2922                     i_ungetc(NL,f);
2923                 } else {
2924                     i_ungetc(c1,f);
2925                 }
2926                 c1 = CR;
2927                 SEND;
2928             } else if (c1 == DEL && input_mode == X0208 ) {
2929                 /* CP5022x */
2930                 c2 = c1;
2931                 NEXT;
2932             } else 
2933                 SEND;
2934         }
2935         /* send: */
2936         switch(input_mode){
2937         case ASCII:
2938             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2939             case -2:
2940                 /* 4 bytes UTF-8 */
2941                 if ((c0 = (*i_getc)(f)) != EOF) {
2942                     code_status(c0);
2943                     c0 <<= 8;
2944                     if ((c3 = (*i_getc)(f)) != EOF) {
2945                         code_status(c3);
2946                         (*iconv)(c2, c1, c0|c3);
2947                     }
2948                 }
2949                 break;
2950             case -1:
2951                 /* 3 bytes EUC or UTF-8 */
2952                 if ((c0 = (*i_getc)(f)) != EOF) {
2953                     code_status(c0);
2954                     (*iconv)(c2, c1, c0);
2955                 }
2956                 break;
2957             }
2958             break;
2959         case X0208:
2960         case X0213_1:
2961             if (ms_ucs_map_f == UCS_MAP_CP932 &&
2962                 0x7F <= c2 && c2 <= 0x92 &&
2963                 0x21 <= c1 && c1 <= 0x7E) {
2964                 /* CP932 UDC */
2965                 if(c1 == 0x7F) return 0;
2966                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2967                 c2 = 0;
2968             }
2969             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2970             break;
2971 #ifdef X0212_ENABLE
2972         case X0212:
2973             (*oconv)(PREFIX_EUCG3 | c2, c1);
2974             break;
2975 #endif /* X0212_ENABLE */
2976         case X0213_2:
2977             (*oconv)(PREFIX_EUCG3 | c2, c1);
2978             break;
2979         default:
2980             (*oconv)(input_mode, c1);  /* other special case */
2981         }
2982
2983         c2 = 0;
2984         c0 = 0;
2985         continue;
2986         /* goto next_word */
2987     }
2988
2989     /* epilogue */
2990     (*iconv)(EOF, 0, 0);
2991     if (!is_inputcode_set)
2992     {
2993         if (is_8bit) {
2994             struct input_code *p = input_code_list;
2995             struct input_code *result = p;
2996             while (p->name){
2997                 if (p->score < result->score) result = p;
2998                 ++p;
2999             }
3000             set_input_codename(result->name);
3001         }
3002     }
3003     return 1;
3004 }
3005
3006 nkf_char
3007 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3008 {
3009     nkf_char ret, c3, c0;
3010     int hold_index;
3011
3012
3013     /** it must NOT be in the kanji shifte sequence      */
3014     /** it must NOT be written in JIS7                   */
3015     /** and it must be after 2 byte 8bit code            */
3016
3017     hold_count = 0;
3018     push_hold_buf(c2);
3019     push_hold_buf(c1);
3020
3021     while ((c1 = (*i_getc)(f)) != EOF) {
3022         if (c1 == ESC){
3023             (*i_ungetc)(c1,f);
3024             break;
3025         }
3026         code_status(c1);
3027         if (push_hold_buf(c1) == EOF || estab_f){
3028             break;
3029         }
3030     }
3031
3032     if (!estab_f){
3033         struct input_code *p = input_code_list;
3034         struct input_code *result = p;
3035         if (c1 == EOF){
3036             code_status(c1);
3037         }
3038         while (p->name){
3039             if (p->score < result->score){
3040                 result = p;
3041             }
3042             ++p;
3043         }
3044         set_iconv(FALSE, result->iconv_func);
3045     }
3046
3047
3048     /** now,
3049      ** 1) EOF is detected, or
3050      ** 2) Code is established, or
3051      ** 3) Buffer is FULL (but last word is pushed)
3052      **
3053      ** in 1) and 3) cases, we continue to use
3054      ** Kanji codes by oconv and leave estab_f unchanged.
3055      **/
3056
3057     ret = c1;
3058     hold_index = 0;
3059     while (hold_index < hold_count){
3060         c2 = hold_buf[hold_index++];
3061         if (c2 <= DEL
3062 #ifdef NUMCHAR_OPTION
3063             || is_unicode_capsule(c2)
3064 #endif
3065             ){
3066             (*iconv)(0, c2, 0);
3067             continue;
3068         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3069             (*iconv)(X0201, c2, 0);
3070             continue;
3071         }
3072         if (hold_index < hold_count){
3073             c1 = hold_buf[hold_index++];
3074         }else{
3075             c1 = (*i_getc)(f);
3076             if (c1 == EOF){
3077                 c3 = EOF;
3078                 break;
3079             }
3080             code_status(c1);
3081         }
3082         c0 = 0;
3083         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3084         case -2:
3085             /* 4 bytes UTF-8 */
3086             if (hold_index < hold_count){
3087                 c0 = hold_buf[hold_index++];
3088             } else if ((c0 = (*i_getc)(f)) == EOF) {
3089                 ret = EOF;
3090                 break;
3091             } else {
3092                 code_status(c0);
3093                 c0 <<= 8;
3094                 if (hold_index < hold_count){
3095                     c3 = hold_buf[hold_index++];
3096                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3097                     c0 = ret = EOF;
3098                     break;
3099                 } else {
3100                     code_status(c3);
3101                     (*iconv)(c2, c1, c0|c3);
3102                 }
3103             }
3104             break;
3105         case -1:
3106             /* 3 bytes EUC or UTF-8 */
3107             if (hold_index < hold_count){
3108                 c0 = hold_buf[hold_index++];
3109             } else if ((c0 = (*i_getc)(f)) == EOF) {
3110                 ret = EOF;
3111                 break;
3112             } else {
3113                 code_status(c0);
3114             }
3115             (*iconv)(c2, c1, c0);
3116             break;
3117         }
3118         if (c0 == EOF) break;
3119     }
3120     return ret;
3121 }
3122
3123 nkf_char push_hold_buf(nkf_char c2)
3124 {
3125     if (hold_count >= HOLD_SIZE*2)
3126         return (EOF);
3127     hold_buf[hold_count++] = (unsigned char)c2;
3128     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3129 }
3130
3131 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3132 {
3133 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3134     nkf_char val;
3135 #endif
3136     static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3137 #ifdef SHIFTJIS_CP932
3138     if (cp51932_f && is_ibmext_in_sjis(c2)){
3139 #if 0
3140         extern const unsigned short shiftjis_cp932[3][189];
3141 #endif
3142         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3143         if (val){
3144             c2 = val >> 8;
3145             c1 = val & 0xff;
3146         }
3147     }
3148 #endif /* SHIFTJIS_CP932 */
3149 #ifdef X0212_ENABLE
3150     if (!x0213_f && is_ibmext_in_sjis(c2)){
3151 #if 0
3152         extern const unsigned short shiftjis_x0212[3][189];
3153 #endif
3154         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3155         if (val){
3156             if (val > 0x7FFF){
3157                 c2 = PREFIX_EUCG3 | (val >> 8);
3158                 c1 = val & 0xff;
3159             }else{
3160                 c2 = val >> 8;
3161                 c1 = val & 0xff;
3162             }
3163             if (p2) *p2 = c2;
3164             if (p1) *p1 = c1;
3165             return 0;
3166         }
3167     }
3168 #endif
3169     if(c2 >= 0x80){
3170         if(x0213_f && c2 >= 0xF0){
3171             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3172                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3173             }else{ /* 78<=k<=94 */
3174                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3175                 if (0x9E < c1) c2++;
3176             }
3177         }else{
3178             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3179             if (0x9E < c1) c2++;
3180         }
3181         if (c1 < 0x9F)
3182             c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
3183         else {
3184             c1 = c1 - 0x7E;
3185         }
3186     }
3187
3188 #ifdef X0212_ENABLE
3189     c2 = x0212_unshift(c2);
3190 #endif
3191     if (p2) *p2 = c2;
3192     if (p1) *p1 = c1;
3193     return 0;
3194 }
3195
3196 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3197 {
3198     if (c2 == X0201) {
3199         c1 &= 0x7f;
3200     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
3201         /* NOP */
3202     } else if (ms_ucs_map_f == UCS_MAP_CP932 &&
3203                0xF0 <= c2 && c2 <= 0xF9 &&
3204                0x40 <= c1 && c1 <= 0xFC) {
3205         /* CP932 UDC */
3206         if(c1 == 0x7F) return 0;
3207         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3208         c2 = 0;
3209     } else {
3210         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3211         if (ret) return ret;
3212     }
3213     (*oconv)(c2, c1);
3214     return 0;
3215 }
3216
3217 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3218 {
3219     if (c2 == X0201) {
3220         c1 &= 0x7f;
3221 #ifdef X0212_ENABLE
3222     }else if (c2 == 0x8f){
3223         if (c0 == 0){
3224             return -1;
3225         }
3226         if (ms_ucs_map_f == UCS_MAP_MS && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3227             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3228             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3229             c2 = 0;
3230         } else {
3231             c2 = (c2 << 8) | (c1 & 0x7f);
3232             c1 = c0 & 0x7f;
3233 #ifdef SHIFTJIS_CP932
3234             if (cp51932_f){
3235                 nkf_char s2, s1;
3236                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3237                     s2e_conv(s2, s1, &c2, &c1);
3238                     if (c2 < 0x100){
3239                         c1 &= 0x7f;
3240                         c2 &= 0x7f;
3241                     }
3242                 }
3243             }
3244 #endif /* SHIFTJIS_CP932 */
3245         }
3246 #endif /* X0212_ENABLE */
3247     } else if (c2 == SSO){
3248         c2 = X0201;
3249         c1 &= 0x7f;
3250     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
3251         /* NOP */
3252     } else {
3253         if (ms_ucs_map_f == UCS_MAP_MS && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3254             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3255             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3256             c2 = 0;
3257         } else {
3258             c1 &= 0x7f;
3259             c2 &= 0x7f;
3260         }
3261     }
3262     (*oconv)(c2, c1);
3263     return 0;
3264 }
3265
3266 #ifdef UTF8_INPUT_ENABLE
3267 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3268 {
3269     nkf_char ret = 0;
3270
3271     if (!c1){
3272         *p2 = 0;
3273         *p1 = c2;
3274     }else if (0xc0 <= c2 && c2 <= 0xef) {
3275         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3276 #ifdef NUMCHAR_OPTION
3277         if (ret > 0){
3278             if (p2) *p2 = 0;
3279             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3280             ret = 0;
3281         }
3282 #endif
3283     }
3284     return ret;
3285 }
3286
3287 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3288 {
3289     nkf_char ret = 0;
3290     static const int w_iconv_utf8_1st_byte[] =
3291     { /* 0xC0 - 0xFF */
3292         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3293         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3294         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3295         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3296     
3297     if (c2 < 0 || 0xff < c2) {
3298     }else if (c2 == 0) { /* 0 : 1 byte*/
3299         c0 = 0;
3300     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3301         return 0;
3302     } else{
3303         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3304         case 21:
3305             if (c1 < 0x80 || 0xBF < c1) return 0;
3306             break;
3307         case 30:
3308             if (c0 == 0) return -1;
3309             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3310                 return 0;
3311             break;
3312         case 31:
3313         case 33:
3314             if (c0 == 0) return -1;
3315             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3316                 return 0;
3317             break;
3318         case 32:
3319             if (c0 == 0) return -1;
3320             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3321                 return 0;
3322             break;
3323         case 40:
3324             if (c0 == 0) return -2;
3325             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3326                 return 0;
3327             break;
3328         case 41:
3329             if (c0 == 0) return -2;
3330             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3331                 return 0;
3332             break;
3333         case 42:
3334             if (c0 == 0) return -2;
3335             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3336                 return 0;
3337             break;
3338         default:
3339             return 0;
3340             break;
3341         }
3342     }
3343     if (c2 == 0 || c2 == EOF){
3344     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3345         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3346         c2 = 0;
3347     } else {
3348         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3349     }
3350     if (ret == 0){
3351         (*oconv)(c2, c1);
3352     }
3353     return ret;
3354 }
3355 #endif
3356
3357 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3358 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3359 {
3360     val &= VALUE_MASK;
3361     if (val < 0x80){
3362         *p2 = val;
3363         *p1 = 0;
3364         *p0 = 0;
3365     }else if (val < 0x800){
3366         *p2 = 0xc0 | (val >> 6);
3367         *p1 = 0x80 | (val & 0x3f);
3368         *p0 = 0;
3369     } else if (val <= NKF_INT32_C(0xFFFF)) {
3370         *p2 = 0xe0 | (val >> 12);
3371         *p1 = 0x80 | ((val >> 6) & 0x3f);
3372         *p0 = 0x80 | (val        & 0x3f);
3373     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3374         *p2 = 0xe0 |  (val >> 16);
3375         *p1 = 0x80 | ((val >> 12) & 0x3f);
3376         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3377     } else {
3378         *p2 = 0;
3379         *p1 = 0;
3380         *p0 = 0;
3381     }
3382 }
3383 #endif
3384
3385 #ifdef UTF8_INPUT_ENABLE
3386 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3387 {
3388     nkf_char val;
3389     if (c2 >= 0xf8) {
3390         val = -1;
3391     } else if (c2 >= 0xf0){
3392         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3393         val = (c2 & 0x0f) << 18;
3394         val |= (c1 & 0x3f) << 12;
3395         val |= (c0 & 0x3f00) >> 2;
3396         val |= (c0 & 0x3f);
3397     }else if (c2 >= 0xe0){
3398         val = (c2 & 0x0f) << 12;
3399         val |= (c1 & 0x3f) << 6;
3400         val |= (c0 & 0x3f);
3401     }else if (c2 >= 0xc0){
3402         val = (c2 & 0x1f) << 6;
3403         val |= (c1 & 0x3f);
3404     }else{
3405         val = c2;
3406     }
3407     return val;
3408 }
3409
3410 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3411 {
3412     nkf_char c2, c1, c0;
3413     nkf_char ret = 0;
3414     val &= VALUE_MASK;
3415     if (val < 0x80){
3416         *p2 = 0;
3417         *p1 = val;
3418     }else{
3419         w16w_conv(val, &c2, &c1, &c0);
3420         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3421 #ifdef NUMCHAR_OPTION
3422         if (ret > 0){
3423             *p2 = 0;
3424             *p1 = CLASS_UNICODE | val;
3425             ret = 0;
3426         }
3427 #endif
3428     }
3429     return ret;
3430 }
3431 #endif
3432
3433 #ifdef UTF8_INPUT_ENABLE
3434 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3435 {
3436     nkf_char ret = 0;
3437     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3438         (*oconv)(c2, c1);
3439         return 0;
3440     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3441         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3442             return -2;
3443         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3444         c2 = 0;
3445     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3446         /*
3447            return 2;
3448         */
3449         return 1;
3450     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3451     if (ret) return ret;
3452     (*oconv)(c2, c1);
3453     return 0;
3454 }
3455
3456 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3457 {
3458     int ret = 0;
3459
3460     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3461     } else if (is_unicode_bmp(c1)) {
3462         ret = w16e_conv(c1, &c2, &c1);
3463     } else {
3464         c2 = 0;
3465         c1 =  CLASS_UNICODE | c1;
3466     }
3467     if (ret) return ret;
3468     (*oconv)(c2, c1);
3469     return 0;
3470 }
3471
3472 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3473 {
3474 #if 0
3475     extern const unsigned short *const utf8_to_euc_2bytes[];
3476     extern const unsigned short *const utf8_to_euc_2bytes_ms[];
3477     extern const unsigned short *const utf8_to_euc_2bytes_932[];
3478     extern const unsigned short *const *const utf8_to_euc_3bytes[];
3479     extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
3480     extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
3481 #endif
3482     const unsigned short *const *pp;
3483     const unsigned short *const *const *ppp;
3484     static const int no_best_fit_chars_table_C2[] =
3485     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3486         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3487         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3488         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3489     static const int no_best_fit_chars_table_C2_ms[] =
3490     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3491         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3492         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3493         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3494     static const int no_best_fit_chars_table_932_C2[] =
3495     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3496         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3497         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3498         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3499     static const int no_best_fit_chars_table_932_C3[] =
3500     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3501         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3502         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3503         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3504     nkf_char ret = 0;
3505
3506     if(c2 < 0x80){
3507         *p2 = 0;
3508         *p1 = c2;
3509     }else if(c2 < 0xe0){
3510         if(no_best_fit_chars_f){
3511             if(ms_ucs_map_f == UCS_MAP_CP932){
3512                 switch(c2){
3513                 case 0xC2:
3514                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3515                     break;
3516                 case 0xC3:
3517                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3518                     break;
3519                 }
3520             }else if(cp51932_f){
3521                 switch(c2){
3522                 case 0xC2:
3523                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3524                     break;
3525                 case 0xC3:
3526                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3527                     break;
3528                 }
3529             }else if(ms_ucs_map_f == UCS_MAP_MS){
3530                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3531             }
3532         }
3533         pp =
3534             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3535             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3536             utf8_to_euc_2bytes;
3537         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3538     }else if(c0 < 0xF0){
3539         if(no_best_fit_chars_f){
3540             if(ms_ucs_map_f == UCS_MAP_CP932){
3541                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3542             }else if(ms_ucs_map_f == UCS_MAP_MS){
3543                 switch(c2){
3544                 case 0xE2:
3545                     switch(c1){
3546                     case 0x80:
3547                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3548                         break;
3549                     case 0x88:
3550                         if(c0 == 0x92) return 1;
3551                         break;
3552                     }
3553                     break;
3554                 case 0xE3:
3555                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3556                     break;
3557                 }
3558             }else{
3559                 switch(c2){
3560                 case 0xE2:
3561                     switch(c1){
3562                     case 0x80:
3563                         if(c0 == 0x95) return 1;
3564                         break;
3565                     case 0x88:
3566                         if(c0 == 0xA5) return 1;
3567                         break;
3568                     }
3569                     break;
3570                 case 0xEF:
3571                     switch(c1){
3572                     case 0xBC:
3573                         if(c0 == 0x8D) return 1;
3574                         break;
3575                     case 0xBD:
3576                         if(c0 == 0x9E && cp51932_f) return 1;
3577                         break;
3578                     case 0xBF:
3579                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3580                         break;
3581                     }
3582                     break;
3583                 }
3584             }
3585         }
3586         ppp =
3587             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3588             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3589             utf8_to_euc_3bytes;
3590         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3591     }else return -1;
3592 #ifdef SHIFTJIS_CP932
3593     if (!ret && cp51932_f && is_eucg3(*p2)) {
3594         nkf_char s2, s1;
3595         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3596             s2e_conv(s2, s1, p2, p1);
3597         }else{
3598             ret = 1;
3599         }
3600     }
3601 #endif
3602     return ret;
3603 }
3604
3605 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3606 {
3607     nkf_char c2;
3608     const unsigned short *p;
3609     unsigned short val;
3610
3611     if (pp == 0) return 1;
3612
3613     c1 -= 0x80;
3614     if (c1 < 0 || psize <= c1) return 1;
3615     p = pp[c1];
3616     if (p == 0)  return 1;
3617
3618     c0 -= 0x80;
3619     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3620     val = p[c0];
3621     if (val == 0) return 1;
3622     if (no_cp932ext_f && (
3623         (val>>8) == 0x2D || /* NEC special characters */
3624         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3625         )) return 1;
3626
3627     c2 = val >> 8;
3628    if (val > 0x7FFF){
3629         c2 &= 0x7f;
3630         c2 |= PREFIX_EUCG3;
3631     }
3632     if (c2 == SO) c2 = X0201;
3633     c1 = val & 0x7f;
3634     if (p2) *p2 = c2;
3635     if (p1) *p1 = c1;
3636     return 0;
3637 }
3638
3639 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3640 {
3641     const char *hex = "0123456789ABCDEF";
3642     int shift = 20;
3643     c &= VALUE_MASK;
3644     while(shift >= 0){
3645         if(c >= 1<<shift){
3646             while(shift >= 0){
3647                 (*f)(0, hex[(c>>shift)&0xF]);
3648                 shift -= 4;
3649             }
3650         }else{
3651             shift -= 4;
3652         }
3653     }
3654     return;
3655 }
3656
3657 void encode_fallback_html(nkf_char c)
3658 {
3659     (*oconv)(0, '&');
3660     (*oconv)(0, '#');
3661     c &= VALUE_MASK;
3662     if(c >= NKF_INT32_C(1000000))
3663         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3664     if(c >= NKF_INT32_C(100000))
3665         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3666     if(c >= 10000)
3667         (*oconv)(0, 0x30+(c/10000  )%10);
3668     if(c >= 1000)
3669         (*oconv)(0, 0x30+(c/1000   )%10);
3670     if(c >= 100)
3671         (*oconv)(0, 0x30+(c/100    )%10);
3672     if(c >= 10)
3673         (*oconv)(0, 0x30+(c/10     )%10);
3674     if(c >= 0)
3675         (*oconv)(0, 0x30+ c         %10);
3676     (*oconv)(0, ';');
3677     return;
3678 }
3679
3680 void encode_fallback_xml(nkf_char c)
3681 {
3682     (*oconv)(0, '&');
3683     (*oconv)(0, '#');
3684     (*oconv)(0, 'x');
3685     nkf_each_char_to_hex(oconv, c);
3686     (*oconv)(0, ';');
3687     return;
3688 }
3689
3690 void encode_fallback_java(nkf_char c)
3691 {
3692     const char *hex = "0123456789ABCDEF";
3693     (*oconv)(0, '\\');
3694     c &= VALUE_MASK;
3695     if(!is_unicode_bmp(c)){
3696         (*oconv)(0, 'U');
3697         (*oconv)(0, '0');
3698         (*oconv)(0, '0');
3699         (*oconv)(0, hex[(c>>20)&0xF]);
3700         (*oconv)(0, hex[(c>>16)&0xF]);
3701     }else{
3702         (*oconv)(0, 'u');
3703     }
3704     (*oconv)(0, hex[(c>>12)&0xF]);
3705     (*oconv)(0, hex[(c>> 8)&0xF]);
3706     (*oconv)(0, hex[(c>> 4)&0xF]);
3707     (*oconv)(0, hex[ c     &0xF]);
3708     return;
3709 }
3710
3711 void encode_fallback_perl(nkf_char c)
3712 {
3713     (*oconv)(0, '\\');
3714     (*oconv)(0, 'x');
3715     (*oconv)(0, '{');
3716     nkf_each_char_to_hex(oconv, c);
3717     (*oconv)(0, '}');
3718     return;
3719 }
3720
3721 void encode_fallback_subchar(nkf_char c)
3722 {
3723     c = unicode_subchar;
3724     (*oconv)((c>>8)&0xFF, c&0xFF);
3725     return;
3726 }
3727 #endif
3728
3729 #ifdef UTF8_OUTPUT_ENABLE
3730 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3731 {
3732 #if 0
3733     extern const unsigned short euc_to_utf8_1byte[];
3734     extern const unsigned short *const euc_to_utf8_2bytes[];
3735     extern const unsigned short *const euc_to_utf8_2bytes_ms[];
3736     extern const unsigned short *const x0212_to_utf8_2bytes[];
3737 #endif
3738     const unsigned short *p;
3739
3740     if (c2 == X0201) {
3741         p = euc_to_utf8_1byte;
3742 #ifdef X0212_ENABLE
3743     } else if (is_eucg3(c2)){
3744         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3745             return 0xA6;
3746         }
3747         c2 = (c2&0x7f) - 0x21;
3748         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3749             p = x0212_to_utf8_2bytes[c2];
3750         else
3751             return 0;
3752 #endif
3753     } else {
3754         c2 &= 0x7f;
3755         c2 = (c2&0x7f) - 0x21;
3756         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3757             p = ms_ucs_map_f != UCS_MAP_ASCII ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
3758         else
3759             return 0;
3760     }
3761     if (!p) return 0;
3762     c1 = (c1 & 0x7f) - 0x21;
3763     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3764         return p[c1];
3765     return 0;
3766 }
3767
3768 void w_oconv(nkf_char c2, nkf_char c1)
3769 {
3770     nkf_char c0;
3771     nkf_char val;
3772
3773     if (output_bom_f) {
3774         output_bom_f = FALSE;
3775         (*o_putc)('\357');
3776         (*o_putc)('\273');
3777         (*o_putc)('\277');
3778     }
3779
3780     if (c2 == EOF) {
3781         (*o_putc)(EOF);
3782         return;
3783     }
3784
3785 #ifdef NUMCHAR_OPTION
3786     if (c2 == 0 && is_unicode_capsule(c1)){
3787         val = c1 & VALUE_MASK;
3788         if (val < 0x80){
3789             (*o_putc)(val);
3790         }else if (val < 0x800){
3791             (*o_putc)(0xC0 | (val >> 6));
3792             (*o_putc)(0x80 | (val & 0x3f));
3793         } else if (val <= NKF_INT32_C(0xFFFF)) {
3794             (*o_putc)(0xE0 | (val >> 12));
3795             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3796             (*o_putc)(0x80 | (val        & 0x3f));
3797         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3798             (*o_putc)(0xF0 | ( val>>18));
3799             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3800             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3801             (*o_putc)(0x80 | ( val      & 0x3f));
3802         }
3803         return;
3804     }
3805 #endif
3806
3807     if (c2 == 0) { 
3808         output_mode = ASCII;
3809         (*o_putc)(c1);
3810     } else if (c2 == ISO8859_1) {
3811         output_mode = ISO8859_1;
3812         (*o_putc)(c1 | 0x080);
3813     } else {
3814         output_mode = UTF8;
3815         val = e2w_conv(c2, c1);
3816         if (val){
3817             w16w_conv(val, &c2, &c1, &c0);
3818             (*o_putc)(c2);
3819             if (c1){
3820                 (*o_putc)(c1);
3821                 if (c0) (*o_putc)(c0);
3822             }
3823         }
3824     }
3825 }
3826
3827 void w_oconv16(nkf_char c2, nkf_char c1)
3828 {
3829     if (output_bom_f) {
3830         output_bom_f = FALSE;
3831         if (output_endian == ENDIAN_LITTLE){
3832             (*o_putc)((unsigned char)'\377');
3833             (*o_putc)('\376');
3834         }else{
3835             (*o_putc)('\376');
3836             (*o_putc)((unsigned char)'\377');
3837         }
3838     }
3839
3840     if (c2 == EOF) {
3841         (*o_putc)(EOF);
3842         return;
3843     }
3844
3845     if (c2 == ISO8859_1) {
3846         c2 = 0;
3847         c1 |= 0x80;
3848 #ifdef NUMCHAR_OPTION
3849     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3850         if (is_unicode_bmp(c1)) {
3851             c2 = (c1 >> 8) & 0xff;
3852             c1 &= 0xff;
3853         } else {
3854             c1 &= VALUE_MASK;
3855             if (c1 <= UNICODE_MAX) {
3856                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3857                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3858                 if (output_endian == ENDIAN_LITTLE){
3859                     (*o_putc)(c2 & 0xff);
3860                     (*o_putc)((c2 >> 8) & 0xff);
3861                     (*o_putc)(c1 & 0xff);
3862                     (*o_putc)((c1 >> 8) & 0xff);
3863                 }else{
3864                     (*o_putc)((c2 >> 8) & 0xff);
3865                     (*o_putc)(c2 & 0xff);
3866                     (*o_putc)((c1 >> 8) & 0xff);
3867                     (*o_putc)(c1 & 0xff);
3868                 }
3869             }
3870             return;
3871         }
3872 #endif
3873     } else if (c2) {
3874         nkf_char val = e2w_conv(c2, c1);
3875         c2 = (val >> 8) & 0xff;
3876         c1 = val & 0xff;
3877         if (!val) return;
3878     }
3879     if (output_endian == ENDIAN_LITTLE){
3880         (*o_putc)(c1);
3881         (*o_putc)(c2);
3882     }else{
3883         (*o_putc)(c2);
3884         (*o_putc)(c1);
3885     }
3886 }
3887
3888 void w_oconv32(nkf_char c2, nkf_char c1)
3889 {
3890     if (output_bom_f) {
3891         output_bom_f = FALSE;
3892         if (output_endian == ENDIAN_LITTLE){
3893             (*o_putc)((unsigned char)'\377');
3894             (*o_putc)('\376');
3895             (*o_putc)('\000');
3896             (*o_putc)('\000');
3897         }else{
3898             (*o_putc)('\000');
3899             (*o_putc)('\000');
3900             (*o_putc)('\376');
3901             (*o_putc)((unsigned char)'\377');
3902         }
3903     }
3904
3905     if (c2 == EOF) {
3906         (*o_putc)(EOF);
3907         return;
3908     }
3909
3910     if (c2 == ISO8859_1) {
3911         c1 |= 0x80;
3912 #ifdef NUMCHAR_OPTION
3913     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3914         c1 &= VALUE_MASK;
3915 #endif
3916     } else if (c2) {
3917         c1 = e2w_conv(c2, c1);
3918         if (!c1) return;
3919     }
3920     if (output_endian == ENDIAN_LITTLE){
3921         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3922         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3923         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3924         (*o_putc)('\000');
3925     }else{
3926         (*o_putc)('\000');
3927         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3928         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3929         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3930     }
3931 }
3932 #endif
3933
3934 void e_oconv(nkf_char c2, nkf_char c1)
3935 {
3936 #ifdef NUMCHAR_OPTION
3937     if (c2 == 0 && is_unicode_capsule(c1)){
3938         w16e_conv(c1, &c2, &c1);
3939         if (c2 == 0 && is_unicode_capsule(c1)){
3940             c2 = c1 & VALUE_MASK;
3941             if (ms_ucs_map_f == UCS_MAP_MS &&
3942                 0xE000 <= c2 && c2 <= 0xE757) {
3943                 /* eucJP-ms UDC */
3944                 c1 &= 0xFFF;
3945                 c2 = c1 / 94;
3946                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
3947                 c1 = 0x21 + c1 % 94;
3948             } else {
3949                 if (encode_fallback) (*encode_fallback)(c1);
3950                 return;
3951             }
3952         }
3953     }
3954 #endif
3955     if (c2 == EOF) {
3956         (*o_putc)(EOF);
3957         return;
3958     } else if (c2 == 0) { 
3959         output_mode = ASCII;
3960         (*o_putc)(c1);
3961     } else if (c2 == X0201) {
3962         output_mode = JAPANESE_EUC;
3963         (*o_putc)(SSO); (*o_putc)(c1|0x80);
3964     } else if (c2 == ISO8859_1) {
3965         output_mode = ISO8859_1;
3966         (*o_putc)(c1 | 0x080);
3967 #ifdef X0212_ENABLE
3968     } else if (is_eucg3(c2)){
3969         output_mode = JAPANESE_EUC;
3970 #ifdef SHIFTJIS_CP932
3971         if (cp51932_f){
3972             nkf_char s2, s1;
3973             if (e2s_conv(c2, c1, &s2, &s1) == 0){
3974                 s2e_conv(s2, s1, &c2, &c1);
3975             }
3976         }
3977 #endif
3978         if (c2 == 0) {
3979             output_mode = ASCII;
3980             (*o_putc)(c1);
3981         }else if (is_eucg3(c2)){
3982             if (x0212_f){
3983                 (*o_putc)(0x8f);
3984                 (*o_putc)((c2 & 0x7f) | 0x080);
3985                 (*o_putc)(c1 | 0x080);
3986             }
3987         }else{
3988             (*o_putc)((c2 & 0x7f) | 0x080);
3989             (*o_putc)(c1 | 0x080);
3990         }
3991 #endif
3992     } else {
3993         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
3994             set_iconv(FALSE, 0);
3995             return; /* too late to rescue this char */
3996         }
3997         output_mode = JAPANESE_EUC;
3998         (*o_putc)(c2 | 0x080);
3999         (*o_putc)(c1 | 0x080);
4000     }
4001 }
4002
4003 #ifdef X0212_ENABLE
4004 nkf_char x0212_shift(nkf_char c)
4005 {
4006     nkf_char ret = c;
4007     c &= 0x7f;
4008     if (is_eucg3(ret)){
4009         if (0x75 <= c && c <= 0x7f){
4010             ret = c + (0x109 - 0x75);
4011         }
4012     }else{
4013         if (0x75 <= c && c <= 0x7f){
4014             ret = c + (0x113 - 0x75);
4015         }
4016     }
4017     return ret;
4018 }
4019
4020
4021 nkf_char x0212_unshift(nkf_char c)
4022 {
4023     nkf_char ret = c;
4024     if (0x7f <= c && c <= 0x88){
4025         ret = c + (0x75 - 0x7f);
4026     }else if (0x89 <= c && c <= 0x92){
4027         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4028     }
4029     return ret;
4030 }
4031 #endif /* X0212_ENABLE */
4032
4033 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4034 {
4035     nkf_char ndx;
4036     if (is_eucg3(c2)){
4037         ndx = c2 & 0xff;
4038         if (x0213_f){
4039             if((0x21 <= ndx && ndx <= 0x2F)){
4040                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4041                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4042                 return 0;
4043             }else if(0x6E <= ndx && ndx <= 0x7E){
4044                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4045                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4046                 return 0;
4047             }
4048             return 1;
4049         }
4050 #ifdef X0212_ENABLE
4051         else if(nkf_isgraph(ndx)){
4052             nkf_char val = 0;
4053             const unsigned short *ptr;
4054 #if 0
4055             extern const unsigned short *const x0212_shiftjis[];
4056 #endif
4057             ptr = x0212_shiftjis[ndx - 0x21];
4058             if (ptr){
4059                 val = ptr[(c1 & 0x7f) - 0x21];
4060             }
4061             if (val){
4062                 c2 = val >> 8;
4063                 c1 = val & 0xff;
4064                 if (p2) *p2 = c2;
4065                 if (p1) *p1 = c1;
4066                 return 0;
4067             }
4068             c2 = x0212_shift(c2);
4069         }
4070 #endif /* X0212_ENABLE */
4071     }
4072     if(0x7F < c2) return 1;
4073     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4074     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4075     return 0;
4076 }
4077
4078 void s_oconv(nkf_char c2, nkf_char c1)
4079 {
4080 #ifdef NUMCHAR_OPTION
4081     if (c2 == 0 && is_unicode_capsule(c1)){
4082         w16e_conv(c1, &c2, &c1);
4083         if (c2 == 0 && is_unicode_capsule(c1)){
4084             c2 = c1 & VALUE_MASK;
4085             if (ms_ucs_map_f == UCS_MAP_CP932 &&
4086                 0xE000 <= c2 && c2 <= 0xE757) {
4087                 /* CP932 UDC */
4088                 c1 &= 0xFFF;
4089                 c2 = c1 / 188 + 0xF0;
4090                 c1 = c1 % 188;
4091                 c1 += 0x40 + (c1 > 0x3e);
4092                 (*o_putc)(c2);
4093                 (*o_putc)(c1);
4094                 return;
4095             } else {
4096                 if(encode_fallback)(*encode_fallback)(c1);
4097                 return;
4098             }
4099         }
4100     }
4101 #endif
4102     if (c2 == EOF) {
4103         (*o_putc)(EOF);
4104         return;
4105     } else if (c2 == 0) {
4106         output_mode = ASCII;
4107         (*o_putc)(c1);
4108     } else if (c2 == X0201) {
4109         output_mode = SHIFT_JIS;
4110         (*o_putc)(c1|0x80);
4111     } else if (c2 == ISO8859_1) {
4112         output_mode = ISO8859_1;
4113         (*o_putc)(c1 | 0x080);
4114 #ifdef X0212_ENABLE
4115     } else if (is_eucg3(c2)){
4116         output_mode = SHIFT_JIS;
4117         if (e2s_conv(c2, c1, &c2, &c1) == 0){
4118             (*o_putc)(c2);
4119             (*o_putc)(c1);
4120         }
4121 #endif
4122     } else {
4123         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4124             set_iconv(FALSE, 0);
4125             return; /* too late to rescue this char */
4126         }
4127         output_mode = SHIFT_JIS;
4128         e2s_conv(c2, c1, &c2, &c1);
4129
4130 #ifdef SHIFTJIS_CP932
4131         if (cp932inv_f
4132             && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
4133 #if 0
4134             extern const unsigned short cp932inv[2][189];
4135 #endif
4136             nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
4137             if (c){
4138                 c2 = c >> 8;
4139                 c1 = c & 0xff;
4140             }
4141         }
4142 #endif /* SHIFTJIS_CP932 */
4143
4144         (*o_putc)(c2);
4145         if (prefix_table[(unsigned char)c1]){
4146             (*o_putc)(prefix_table[(unsigned char)c1]);
4147         }
4148         (*o_putc)(c1);
4149     }
4150 }
4151
4152 void j_oconv(nkf_char c2, nkf_char c1)
4153 {
4154 #ifdef NUMCHAR_OPTION
4155     if (c2 == 0 && is_unicode_capsule(c1)){
4156         w16e_conv(c1, &c2, &c1);
4157         if (c2 == 0 && is_unicode_capsule(c1)){
4158             c2 = c1 & VALUE_MASK;
4159             if (ms_ucs_map_f == UCS_MAP_CP932 &&
4160                 0xE000 <= c2 && c2 <= 0xE757) {
4161                 /* CP5022x UDC */
4162                 c1 &= 0xFFF;
4163                 c2 = 0x7F + c1 / 94;
4164                 c1 = 0x21 + c1 % 94;
4165             } else {
4166                 if (encode_fallback) (*encode_fallback)(c1);
4167                 return;
4168             }
4169         }
4170     }
4171 #endif
4172     if (c2 == EOF) {
4173         if (output_mode !=ASCII && output_mode!=ISO8859_1) {
4174             (*o_putc)(ESC);
4175             (*o_putc)('(');
4176             (*o_putc)(ascii_intro);
4177             output_mode = ASCII;
4178         }
4179         (*o_putc)(EOF);
4180 #ifdef X0212_ENABLE
4181     } else if (is_eucg3(c2)){
4182         if(x0213_f){
4183             if(output_mode!=X0213_2){
4184                 output_mode = X0213_2;
4185                 (*o_putc)(ESC);
4186                 (*o_putc)('$');
4187                 (*o_putc)('(');
4188                 (*o_putc)(X0213_2&0x7F);
4189             }
4190         }else{
4191             if(output_mode!=X0212){
4192                 output_mode = X0212;
4193                 (*o_putc)(ESC);
4194                 (*o_putc)('$');
4195                 (*o_putc)('(');
4196                 (*o_putc)(X0212&0x7F);
4197             }
4198         }
4199         (*o_putc)(c2 & 0x7f);
4200         (*o_putc)(c1);
4201 #endif
4202     } else if (c2==X0201) {
4203         if (output_mode!=X0201) {
4204             output_mode = X0201;
4205             (*o_putc)(ESC);
4206             (*o_putc)('(');
4207             (*o_putc)('I');
4208         }
4209         (*o_putc)(c1);
4210     } else if (c2==ISO8859_1) {
4211             /* iso8859 introduction, or 8th bit on */
4212             /* Can we convert in 7bit form using ESC-'-'-A ? 
4213                Is this popular? */
4214         output_mode = ISO8859_1;
4215         (*o_putc)(c1|0x80);
4216     } else if (c2 == 0) {
4217         if (output_mode !=ASCII && output_mode!=ISO8859_1) {
4218             (*o_putc)(ESC);
4219             (*o_putc)('(');
4220             (*o_putc)(ascii_intro);
4221             output_mode = ASCII;
4222         }
4223         (*o_putc)(c1);
4224     } else {
4225         if(ms_ucs_map_f == UCS_MAP_CP932
4226            ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
4227            : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
4228         if(x0213_f){
4229             if (output_mode!=X0213_1) {
4230                 output_mode = X0213_1;
4231                 (*o_putc)(ESC);
4232                 (*o_putc)('$');
4233                 (*o_putc)('(');
4234                 (*o_putc)(X0213_1&0x7F);
4235             }
4236         }else if (output_mode != X0208) {
4237             output_mode = X0208;
4238             (*o_putc)(ESC);
4239             (*o_putc)('$');
4240             (*o_putc)(kanji_intro);
4241         }
4242         (*o_putc)(c2);
4243         (*o_putc)(c1);
4244     }
4245 }
4246
4247 void base64_conv(nkf_char c2, nkf_char c1)
4248 {
4249     mime_prechar(c2, c1);
4250     (*o_base64conv)(c2,c1);
4251 }
4252
4253
4254 static nkf_char broken_buf[3];