OSDN Git Service

* Pass through SI/SO/ESC when input is EUC [nkf-dev#39]
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B 
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program 
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.  
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30 ** UTF-8 \e$B%5%]!<%H$K$D$$$F\e(B
31 **    \e$B=>Mh$N\e(B nkf \e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9\e(B
32 **    nkf -e \e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G\e(B UTF-8 \e$B$HH=Dj$5$l$l$P!"\e(B
33 **    \e$B$=$N$^$^\e(B euc-jp \e$B$KJQ49$5$l$^$9\e(B
34 **
35 **    \e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#\e(B
36 **    (\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O\e(B)
37 **
38 **    \e$B2?$+LdBj$r8+$D$1$?$i!"\e(B
39 **        E-Mail: furukawa@tcp-ip.or.jp
40 **    \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.105 2006/08/23 17:30:32 naruse Exp $ */
43 #define NKF_VERSION "2.0.8"
44 #define NKF_RELEASE_DATE "2006-08-24"
45 #include "config.h"
46 #include "utf8tbl.h"
47
48 #define COPY_RIGHT \
49     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
50     "Copyright (C) 2002-2006 Kono, Furukawa, Naruse, mastodon"
51
52
53 /*
54 **
55 **
56 **
57 ** USAGE:       nkf [flags] [file] 
58 **
59 ** Flags:
60 ** b    Output is buffered             (DEFAULT)
61 ** u    Output is unbuffered
62 **
63 ** t    no operation
64 **
65 ** j    Output code is JIS 7 bit        (DEFAULT SELECT) 
66 ** s    Output code is MS Kanji         (DEFAULT SELECT) 
67 ** e    Output code is AT&T JIS         (DEFAULT SELECT) 
68 ** w    Output code is AT&T JIS         (DEFAULT SELECT) 
69 ** l    Output code is JIS 7bit and ISO8859-1 Latin-1
70 **
71 ** m    MIME conversion for ISO-2022-JP
72 ** I    Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
73 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
74 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
75 ** M    MIME output conversion 
76 **
77 ** r  {de/en}crypt ROT13/47
78 **
79 ** v  display Version
80 **
81 ** T  Text mode output        (for MS-DOS)
82 **
83 ** x    Do not convert X0201 kana into X0208
84 ** Z    Convert X0208 alphabet to ASCII
85 **
86 ** f60  fold option
87 **
88 ** m    MIME decode
89 ** B    try to fix broken JIS, missing Escape
90 ** B[1-9]  broken level
91 **
92 ** O   Output to 'nkf.out' file or last file name
93 ** d   Delete \r in line feed 
94 ** c   Add \r in line feed 
95 ** -- other long option
96 ** -- ignore following option (don't use with -O )
97 **
98 **/
99
100 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
101 #define MSDOS
102 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
103 #define __WIN32__
104 #endif
105 #endif
106
107 #ifdef PERL_XS
108 #undef OVERWRITE
109 #endif
110
111 #ifndef PERL_XS
112 #include <stdio.h>
113 #endif
114
115 #include <stdlib.h>
116 #include <string.h>
117
118 #if defined(MSDOS) || defined(__OS2__)
119 #include <fcntl.h>
120 #include <io.h>
121 #if defined(_MSC_VER) || defined(__WATCOMC__)
122 #define mktemp _mktemp
123 #endif
124 #endif
125
126 #ifdef MSDOS
127 #ifdef LSI_C
128 #define setbinmode(fp) fsetbin(fp)
129 #elif defined(__DJGPP__)
130 #include <libc/dosio.h>
131 #define setbinmode(fp) djgpp_setbinmode(fp)
132 #else /* Microsoft C, Turbo C */
133 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
134 #endif
135 #else /* UNIX */
136 #define setbinmode(fp)
137 #endif
138
139 #if defined(__DJGPP__)
140 void  djgpp_setbinmode(FILE *fp)
141 {
142     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
143     int fd, m;
144     fd = fileno(fp);
145     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
146     __file_handle_set(fd, m);
147 }
148 #endif
149
150 #ifdef _IOFBF /* SysV and MSDOS, Windows */
151 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
152 #else /* BSD */
153 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
154 #endif
155
156 /*Borland C++ 4.5 EasyWin*/
157 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
158 #define         EASYWIN
159 #ifndef __WIN16__
160 #define __WIN16__
161 #endif
162 #include <windows.h>
163 #endif
164
165 #ifdef OVERWRITE
166 /* added by satoru@isoternet.org */
167 #if defined(__EMX__)
168 #include <sys/types.h>
169 #endif
170 #include <sys/stat.h>
171 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
172 #include <unistd.h>
173 #if defined(__WATCOMC__)
174 #include <sys/utime.h>
175 #else
176 #include <utime.h>
177 #endif
178 #else /* defined(MSDOS) */
179 #ifdef __WIN32__
180 #ifdef __BORLANDC__ /* BCC32 */
181 #include <utime.h>
182 #else /* !defined(__BORLANDC__) */
183 #include <sys/utime.h>
184 #endif /* (__BORLANDC__) */
185 #else /* !defined(__WIN32__) */
186 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
187 #include <sys/utime.h>
188 #elif defined(__TURBOC__) /* BCC */
189 #include <utime.h>
190 #elif defined(LSI_C) /* LSI C */
191 #endif /* (__WIN32__) */
192 #endif
193 #endif
194 #endif
195
196 #define         FALSE   0
197 #define         TRUE    1
198
199 /* state of output_mode and input_mode  
200
201    c2           0 means ASCII
202                 X0201
203                 ISO8859_1
204                 X0208
205                 EOF      all termination
206    c1           32bit data
207
208  */
209
210 #define         ASCII           0
211 #define         X0208           1
212 #define         X0201           2
213 #define         ISO8859_1       8
214 #define         NO_X0201        3
215 #define         X0212      0x2844
216 #define         X0213_1    0x284F
217 #define         X0213_2    0x2850
218
219 /* Input Assumption */
220
221 #define         JIS_INPUT       4
222 #define         EUC_INPUT      16
223 #define         SJIS_INPUT      5
224 #define         LATIN1_INPUT    6
225 #define         FIXED_MIME      7
226 #define         STRICT_MIME     8
227
228 /* MIME ENCODE */
229
230 #define         ISO2022JP       9
231 #define         JAPANESE_EUC   10
232 #define         SHIFT_JIS      11
233
234 #define         UTF8           12
235 #define         UTF8_INPUT     13
236 #define         UTF16BE_INPUT  14
237 #define         UTF16LE_INPUT  15
238
239 #define         WISH_TRUE      15
240
241 /* ASCII CODE */
242
243 #define         BS      0x08
244 #define         TAB     0x09
245 #define         NL      0x0a
246 #define         CR      0x0d
247 #define         ESC     0x1b
248 #define         SPACE   0x20
249 #define         AT      0x40
250 #define         SSP     0xa0
251 #define         DEL     0x7f
252 #define         SI      0x0f
253 #define         SO      0x0e
254 #define         SSO     0x8e
255 #define         SS3     0x8f
256
257 #define         is_alnum(c)  \
258             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
259
260 /* I don't trust portablity of toupper */
261 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
262 #define nkf_isoctal(c)  ('0'<=c && c<='7')
263 #define nkf_isdigit(c)  ('0'<=c && c<='9')
264 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
265 #define nkf_isblank(c) (c == SPACE || c == TAB)
266 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
267 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
268 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
269 #define nkf_isprint(c) (' '<=c && c<='~')
270 #define nkf_isgraph(c) ('!'<=c && c<='~')
271 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
272                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
273                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0 )
274 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
275
276 #define CP932_TABLE_BEGIN 0xFA
277 #define CP932_TABLE_END   0xFC
278 #define CP932INV_TABLE_BEGIN 0xED
279 #define CP932INV_TABLE_END   0xEE
280 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
281
282 #define         HOLD_SIZE       1024
283 #if defined(INT_IS_SHORT)
284 #define         IOBUF_SIZE      2048
285 #else
286 #define         IOBUF_SIZE      16384
287 #endif
288
289 #define         DEFAULT_J       'B'
290 #define         DEFAULT_R       'B'
291
292 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
293 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
294
295 #define         RANGE_NUM_MAX   18
296 #define         GETA1   0x22
297 #define         GETA2   0x2e
298
299
300 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
301 #define sizeof_euc_to_utf8_1byte 94
302 #define sizeof_euc_to_utf8_2bytes 94
303 #define sizeof_utf8_to_euc_C2 64
304 #define sizeof_utf8_to_euc_E5B8 64
305 #define sizeof_utf8_to_euc_2bytes 112
306 #define sizeof_utf8_to_euc_3bytes 16
307 #endif
308
309 /* MIME preprocessor */
310
311 #ifdef EASYWIN /*Easy Win */
312 extern POINT _BufferSize;
313 #endif
314
315 struct input_code{
316     char *name;
317     nkf_char stat;
318     nkf_char score;
319     nkf_char index;
320     nkf_char buf[3];
321     void (*status_func)(struct input_code *, nkf_char);
322     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
323     int _file_stat;
324 };
325
326 static char *input_codename = "";
327
328 #ifndef PERL_XS
329 static const char *CopyRight = COPY_RIGHT;
330 #endif
331 #if !defined(PERL_XS) && !defined(WIN32DLL)
332 static  nkf_char     noconvert(FILE *f);
333 #endif
334 static  void    module_connection(void);
335 static  nkf_char     kanji_convert(FILE *f);
336 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
337 static  nkf_char     push_hold_buf(nkf_char c2);
338 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
339 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
340 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
341 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
342 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
343 /* UCS Mapping
344  * 0: Shift_JIS, eucJP-ascii
345  * 1: eucJP-ms
346  * 2: CP932, CP51932
347  */
348 #define UCS_MAP_ASCII 0
349 #define UCS_MAP_MS    1
350 #define UCS_MAP_CP932 2
351 static int ms_ucs_map_f = UCS_MAP_ASCII;
352 #endif
353 #ifdef UTF8_INPUT_ENABLE
354 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
355 static  int     no_cp932ext_f = FALSE;
356 /* ignore ZERO WIDTH NO-BREAK SPACE */
357 static  int     ignore_zwnbsp_f = TRUE;
358 static  int     no_best_fit_chars_f = FALSE;
359 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
360 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
361 static  void    encode_fallback_html(nkf_char c);
362 static  void    encode_fallback_xml(nkf_char c);
363 static  void    encode_fallback_java(nkf_char c);
364 static  void    encode_fallback_perl(nkf_char c);
365 static  void    encode_fallback_subchar(nkf_char c);
366 static  void    (*encode_fallback)(nkf_char c) = NULL;
367 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
368 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
369 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
370 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
371 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
372 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
373 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
374 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
375 #endif
376 #ifdef UTF8_OUTPUT_ENABLE
377 static  int     unicode_bom_f= 0;   /* Output Unicode BOM */
378 static  int     w_oconv16_LE = 0;   /* utf-16 little endian */
379 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
380 static  void    w_oconv(nkf_char c2,nkf_char c1);
381 static  void    w_oconv16(nkf_char c2,nkf_char c1);
382 #endif
383 static  void    e_oconv(nkf_char c2,nkf_char c1);
384 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
385 static  void    s_oconv(nkf_char c2,nkf_char c1);
386 static  void    j_oconv(nkf_char c2,nkf_char c1);
387 static  void    fold_conv(nkf_char c2,nkf_char c1);
388 static  void    cr_conv(nkf_char c2,nkf_char c1);
389 static  void    z_conv(nkf_char c2,nkf_char c1);
390 static  void    rot_conv(nkf_char c2,nkf_char c1);
391 static  void    hira_conv(nkf_char c2,nkf_char c1);
392 static  void    base64_conv(nkf_char c2,nkf_char c1);
393 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
394 static  void    no_connection(nkf_char c2,nkf_char c1);
395 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
396
397 static  void    code_score(struct input_code *ptr);
398 static  void    code_status(nkf_char c);
399
400 static  void    std_putc(nkf_char c);
401 static  nkf_char     std_getc(FILE *f);
402 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
403
404 static  nkf_char     broken_getc(FILE *f);
405 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
406
407 static  nkf_char     mime_begin(FILE *f);
408 static  nkf_char     mime_getc(FILE *f);
409 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
410
411 static  void    switch_mime_getc(void);
412 static  void    unswitch_mime_getc(void);
413 static  nkf_char     mime_begin_strict(FILE *f);
414 static  nkf_char     mime_getc_buf(FILE *f);
415 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
416 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
417
418 static  nkf_char     base64decode(nkf_char c);
419 static  void    mime_prechar(nkf_char c2, nkf_char c1);
420 static  void    mime_putc(nkf_char c);
421 static  void    open_mime(nkf_char c);
422 static  void    close_mime(void);
423 static  void    eof_mime(void);
424 static  void    mimeout_addchar(nkf_char c);
425 #ifndef PERL_XS
426 static  void    usage(void);
427 static  void    version(void);
428 #endif
429 static  void    options(unsigned char *c);
430 #if defined(PERL_XS) || defined(WIN32DLL)
431 static  void    reinit(void);
432 #endif
433
434 /* buffers */
435
436 #if !defined(PERL_XS) && !defined(WIN32DLL)
437 static unsigned char   stdibuf[IOBUF_SIZE];
438 static unsigned char   stdobuf[IOBUF_SIZE];
439 #endif
440 static unsigned char   hold_buf[HOLD_SIZE*2];
441 static int             hold_count;
442
443 /* MIME preprocessor fifo */
444
445 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
446 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)   
447 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
448 static unsigned char           mime_buf[MIME_BUF_SIZE];
449 static unsigned int            mime_top = 0;
450 static unsigned int            mime_last = 0;  /* decoded */
451 static unsigned int            mime_input = 0; /* undecoded */
452 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
453
454 /* flags */
455 static int             unbuf_f = FALSE;
456 static int             estab_f = FALSE;
457 static int             nop_f = FALSE;
458 static int             binmode_f = TRUE;       /* binary mode */
459 static int             rot_f = FALSE;          /* rot14/43 mode */
460 static int             hira_f = FALSE;          /* hira/kata henkan */
461 static int             input_f = FALSE;        /* non fixed input code  */
462 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
463 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
464 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
465 static int             mimebuf_f = FALSE;      /* MIME buffered input */
466 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
467 static int             iso8859_f = FALSE;      /* ISO8859 through */
468 static int             mimeout_f = FALSE;       /* base64 mode */
469 #if defined(MSDOS) || defined(__OS2__) 
470 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
471 #else
472 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
473 #endif
474 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
475
476 #ifdef UNICODE_NORMALIZATION
477 static int nfc_f = FALSE;
478 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
479 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
480 static nkf_char nfc_getc(FILE *f);
481 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
482 #endif
483
484 #ifdef INPUT_OPTION
485 static int cap_f = FALSE;
486 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
487 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
488 static nkf_char cap_getc(FILE *f);
489 static nkf_char cap_ungetc(nkf_char c,FILE *f);
490
491 static int url_f = FALSE;
492 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
493 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
494 static nkf_char url_getc(FILE *f);
495 static nkf_char url_ungetc(nkf_char c,FILE *f);
496 #endif
497
498 #if defined(INT_IS_SHORT)
499 #define NKF_INT32_C(n)   (n##L)
500 #else
501 #define NKF_INT32_C(n)   (n)
502 #endif
503 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
504 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
505 #define CLASS_UTF16     NKF_INT32_C(0x01000000)
506 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
507 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
508 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UTF16)
509 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
510
511 #ifdef NUMCHAR_OPTION
512 static int numchar_f = FALSE;
513 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
514 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
515 static nkf_char numchar_getc(FILE *f);
516 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
517 #endif
518
519 #ifdef CHECK_OPTION
520 static int noout_f = FALSE;
521 static void no_putc(nkf_char c);
522 static nkf_char debug_f = FALSE;
523 static void debug(const char *str);
524 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
525 #endif
526
527 static int guess_f = FALSE;
528 #if !defined PERL_XS
529 static  void    print_guessed_code(char *filename);
530 #endif
531 static  void    set_input_codename(char *codename);
532 static int is_inputcode_mixed = FALSE;
533 static int is_inputcode_set   = FALSE;
534
535 #ifdef EXEC_IO
536 static int exec_f = 0;
537 #endif
538
539 #ifdef SHIFTJIS_CP932
540 /* invert IBM extended characters to others */
541 static int cp51932_f = TRUE;
542
543 /* invert NEC-selected IBM extended characters to IBM extended characters */
544 static int cp932inv_f = TRUE;
545
546 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
547 #endif /* SHIFTJIS_CP932 */
548
549 #ifdef X0212_ENABLE
550 static int x0212_f = FALSE;
551 static nkf_char x0212_shift(nkf_char c);
552 static nkf_char x0212_unshift(nkf_char c);
553 #endif
554 static int x0213_f = FALSE;
555
556 static unsigned char prefix_table[256];
557
558 static void set_code_score(struct input_code *ptr, nkf_char score);
559 static void clr_code_score(struct input_code *ptr, nkf_char score);
560 static void status_disable(struct input_code *ptr);
561 static void status_push_ch(struct input_code *ptr, nkf_char c);
562 static void status_clear(struct input_code *ptr);
563 static void status_reset(struct input_code *ptr);
564 static void status_reinit(struct input_code *ptr);
565 static void status_check(struct input_code *ptr, nkf_char c);
566 static void e_status(struct input_code *, nkf_char);
567 static void s_status(struct input_code *, nkf_char);
568
569 #ifdef UTF8_INPUT_ENABLE
570 static void w_status(struct input_code *, nkf_char);
571 static void w16_status(struct input_code *, nkf_char);
572 static int             utf16_mode = UTF16BE_INPUT;
573 #endif
574
575 struct input_code input_code_list[] = {
576     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
577     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
578 #ifdef UTF8_INPUT_ENABLE
579     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
580     {"UTF-16",    0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
581 #endif
582     {0}
583 };
584
585 static int              mimeout_mode = 0;
586 static int              base64_count = 0;
587
588 /* X0208 -> ASCII converter */
589
590 /* fold parameter */
591 static int             f_line = 0;    /* chars in line */
592 static int             f_prev = 0;
593 static int             fold_preserve_f = FALSE; /* preserve new lines */
594 static int             fold_f  = FALSE;
595 static int             fold_len  = 0;
596
597 /* options */
598 static unsigned char   kanji_intro = DEFAULT_J;
599 static unsigned char   ascii_intro = DEFAULT_R;
600
601 /* Folding */
602
603 #define FOLD_MARGIN  10
604 #define DEFAULT_FOLD 60
605
606 static int             fold_margin  = FOLD_MARGIN;
607
608 /* converters */
609
610 #ifdef DEFAULT_CODE_JIS
611 #   define  DEFAULT_CONV j_oconv
612 #endif
613 #ifdef DEFAULT_CODE_SJIS
614 #   define  DEFAULT_CONV s_oconv
615 #endif
616 #ifdef DEFAULT_CODE_EUC
617 #   define  DEFAULT_CONV e_oconv
618 #endif
619 #ifdef DEFAULT_CODE_UTF8
620 #   define  DEFAULT_CONV w_oconv
621 #endif
622
623 /* process default */
624 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
625
626 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
627 /* s_iconv or oconv */
628 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
629
630 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
631 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
632 static void (*o_crconv)(nkf_char c2,nkf_char c1) = no_connection;
633 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
634 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
635 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
636 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
637
638 /* static redirections */
639
640 static  void   (*o_putc)(nkf_char c) = std_putc;
641
642 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
643 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
644
645 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
646 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
647
648 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
649
650 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
651 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
652
653 /* for strict mime */
654 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
655 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
656
657 /* Global states */
658 static int output_mode = ASCII,    /* output kanji mode */
659            input_mode =  ASCII,    /* input kanji mode */
660            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
661 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
662
663 /* X0201 / X0208 conversion tables */
664
665 /* X0201 kana conversion table */
666 /* 90-9F A0-DF */
667 static const
668 unsigned char cv[]= {
669     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
670     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
671     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
672     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
673     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
674     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
675     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
676     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
677     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
678     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
679     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
680     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
681     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
682     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
683     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
684     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
685     0x00,0x00};
686
687
688 /* X0201 kana conversion table for daguten */
689 /* 90-9F A0-DF */
690 static const
691 unsigned char dv[]= { 
692     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
693     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
694     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
695     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
696     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
697     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
698     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
699     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
700     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
701     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
702     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
703     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
704     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
705     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
706     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
707     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
708     0x00,0x00};
709
710 /* X0201 kana conversion table for han-daguten */
711 /* 90-9F A0-DF */
712 static const
713 unsigned char ev[]= { 
714     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
715     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
716     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
717     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
718     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
719     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
720     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
721     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
722     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
723     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
724     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
725     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
726     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
727     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
728     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
729     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
730     0x00,0x00};
731
732
733 /* X0208 kigou conversion table */
734 /* 0x8140 - 0x819e */
735 static const
736 unsigned char fv[] = {
737
738     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
739     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
740     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
741     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
742     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
743     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
744     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
745     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
746     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
747     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
748     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
749     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
750 } ;
751
752
753 #define    CRLF      1
754
755 static int             file_out_f = FALSE;
756 #ifdef OVERWRITE
757 static int             overwrite_f = FALSE;
758 static int             preserve_time_f = FALSE;
759 static int             backup_f = FALSE;
760 static char            *backup_suffix = "";
761 static char *get_backup_filename(const char *suffix, const char *filename);
762 #endif
763
764 static int             crmode_f = 0;   /* CR, NL, CRLF */
765 #ifdef EASYWIN /*Easy Win */
766 static int             end_check;
767 #endif /*Easy Win */
768
769 #define STD_GC_BUFSIZE (256)
770 nkf_char std_gc_buf[STD_GC_BUFSIZE];
771 nkf_char std_gc_ndx;
772
773 #ifdef WIN32DLL
774 #include "nkf32dll.c"
775 #elif defined(PERL_XS)
776 #else /* WIN32DLL */
777 int main(int argc, char **argv)
778 {
779     FILE  *fin;
780     unsigned char  *cp;
781
782     char *outfname = NULL;
783     char *origfname;
784
785 #ifdef EASYWIN /*Easy Win */
786     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
787 #endif
788
789     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
790         cp = (unsigned char *)*argv;
791         options(cp);
792 #ifdef EXEC_IO
793         if (exec_f){
794             int fds[2], pid;
795             if (pipe(fds) < 0 || (pid = fork()) < 0){
796                 abort();
797             }
798             if (pid == 0){
799                 if (exec_f > 0){
800                     close(fds[0]);
801                     dup2(fds[1], 1);
802                 }else{
803                     close(fds[1]);
804                     dup2(fds[0], 0);
805                 }
806                 execvp(argv[1], &argv[1]);
807             }
808             if (exec_f > 0){
809                 close(fds[1]);
810                 dup2(fds[0], 0);
811             }else{
812                 close(fds[0]);
813                 dup2(fds[1], 1);
814             }
815             argc = 0;
816             break;
817         }
818 #endif
819     }
820     if(x0201_f == WISH_TRUE)
821          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
822
823     if (binmode_f == TRUE)
824 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
825     if (freopen("","wb",stdout) == NULL) 
826         return (-1);
827 #else
828     setbinmode(stdout);
829 #endif
830
831     if (unbuf_f)
832       setbuf(stdout, (char *) NULL);
833     else
834       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
835
836     if (argc == 0) {
837       if (binmode_f == TRUE)
838 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
839       if (freopen("","rb",stdin) == NULL) return (-1);
840 #else
841       setbinmode(stdin);
842 #endif
843       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
844       if (nop_f)
845           noconvert(stdin);
846       else {
847           kanji_convert(stdin);
848           if (guess_f) print_guessed_code(NULL);
849       }
850     } else {
851       int nfiles = argc;
852       while (argc--) {
853             is_inputcode_mixed = FALSE;
854             is_inputcode_set   = FALSE;
855             input_codename = "";
856 #ifdef CHECK_OPTION
857             iconv_for_check = 0;
858 #endif
859           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
860               perror(*--argv);
861               return(-1);
862           } else {
863 #ifdef OVERWRITE
864               int fd = 0;
865               int fd_backup = 0;
866 #endif
867
868 /* reopen file for stdout */
869               if (file_out_f == TRUE) {
870 #ifdef OVERWRITE
871                   if (overwrite_f){
872                       outfname = malloc(strlen(origfname)
873                                         + strlen(".nkftmpXXXXXX")
874                                         + 1);
875                       if (!outfname){
876                           perror(origfname);
877                           return -1;
878                       }
879                       strcpy(outfname, origfname);
880 #ifdef MSDOS
881                       {
882                           int i;
883                           for (i = strlen(outfname); i; --i){
884                               if (outfname[i - 1] == '/'
885                                   || outfname[i - 1] == '\\'){
886                                   break;
887                               }
888                           }
889                           outfname[i] = '\0';
890                       }
891                       strcat(outfname, "ntXXXXXX");
892                       mktemp(outfname);
893                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
894                                 S_IREAD | S_IWRITE);
895 #else
896                       strcat(outfname, ".nkftmpXXXXXX");
897                       fd = mkstemp(outfname);
898 #endif
899                       if (fd < 0
900                           || (fd_backup = dup(fileno(stdout))) < 0
901                           || dup2(fd, fileno(stdout)) < 0
902                           ){
903                           perror(origfname);
904                           return -1;
905                       }
906                   }else
907 #endif
908                   if(argc == 1 ) {
909                       outfname = *argv++;
910                       argc--;
911                   } else {
912                       outfname = "nkf.out";
913                   }
914
915                   if(freopen(outfname, "w", stdout) == NULL) {
916                       perror (outfname);
917                       return (-1);
918                   }
919                   if (binmode_f == TRUE) {
920 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
921                       if (freopen("","wb",stdout) == NULL) 
922                            return (-1);
923 #else
924                       setbinmode(stdout);
925 #endif
926                   }
927               }
928               if (binmode_f == TRUE)
929 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
930                  if (freopen("","rb",fin) == NULL) 
931                     return (-1);
932 #else
933                  setbinmode(fin);
934 #endif 
935               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
936               if (nop_f)
937                   noconvert(fin);
938               else {
939                   char *filename = NULL;
940                   kanji_convert(fin);
941                   if (nfiles > 1) filename = origfname;
942                   if (guess_f) print_guessed_code(filename);
943               }
944               fclose(fin);
945 #ifdef OVERWRITE
946               if (overwrite_f) {
947                   struct stat     sb;
948 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
949                   time_t tb[2];
950 #else
951                   struct utimbuf  tb;
952 #endif
953
954                   fflush(stdout);
955                   close(fd);
956                   if (dup2(fd_backup, fileno(stdout)) < 0){
957                       perror("dup2");
958                   }
959                   if (stat(origfname, &sb)) {
960                       fprintf(stderr, "Can't stat %s\n", origfname);
961                   }
962                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
963                   if (chmod(outfname, sb.st_mode)) {
964                       fprintf(stderr, "Can't set permission %s\n", outfname);
965                   }
966
967                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
968                     if(preserve_time_f){
969 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
970                         tb[0] = tb[1] = sb.st_mtime;
971                         if (utime(outfname, tb)) {
972                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
973                         }
974 #else
975                         tb.actime  = sb.st_atime;
976                         tb.modtime = sb.st_mtime;
977                         if (utime(outfname, &tb)) {
978                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
979                         }
980 #endif
981                     }
982                     if(backup_f){
983                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
984 #ifdef MSDOS
985                         unlink(backup_filename);
986 #endif
987                         if (rename(origfname, backup_filename)) {
988                             perror(backup_filename);
989                             fprintf(stderr, "Can't rename %s to %s\n",
990                                     origfname, backup_filename);
991                         }
992                     }else{
993 #ifdef MSDOS
994                         if (unlink(origfname)){
995                             perror(origfname);
996                         }
997 #endif
998                     }
999                   if (rename(outfname, origfname)) {
1000                       perror(origfname);
1001                       fprintf(stderr, "Can't rename %s to %s\n",
1002                               outfname, origfname);
1003                   }
1004                   free(outfname);
1005               }
1006 #endif
1007           }
1008       }
1009     }
1010 #ifdef EASYWIN /*Easy Win */
1011     if (file_out_f == FALSE) 
1012         scanf("%d",&end_check);
1013     else 
1014         fclose(stdout);
1015 #else /* for Other OS */
1016     if (file_out_f == TRUE) 
1017         fclose(stdout);
1018 #endif /*Easy Win */
1019     return (0);
1020 }
1021 #endif /* WIN32DLL */
1022
1023 #ifdef OVERWRITE
1024 char *get_backup_filename(const char *suffix, const char *filename)
1025 {
1026     char *backup_filename;
1027     int asterisk_count = 0;
1028     int i, j;
1029     int filename_length = strlen(filename);
1030
1031     for(i = 0; suffix[i]; i++){
1032         if(suffix[i] == '*') asterisk_count++;
1033     }
1034
1035     if(asterisk_count){
1036         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1037         if (!backup_filename){
1038             perror("Can't malloc backup filename.");
1039             return NULL;
1040         }
1041
1042         for(i = 0, j = 0; suffix[i];){
1043             if(suffix[i] == '*'){
1044                 backup_filename[j] = '\0';
1045                 strncat(backup_filename, filename, filename_length);
1046                 i++;
1047                 j += filename_length;
1048             }else{
1049                 backup_filename[j++] = suffix[i++];
1050             }
1051         }
1052         backup_filename[j] = '\0';
1053     }else{
1054         j = strlen(suffix) + filename_length;
1055         backup_filename = malloc( + 1);
1056         strcpy(backup_filename, filename);
1057         strcat(backup_filename, suffix);
1058         backup_filename[j] = '\0';
1059     }
1060     return backup_filename;
1061 }
1062 #endif
1063
1064 static const
1065 struct {
1066     const char *name;
1067     const char *alias;
1068 } long_option[] = {
1069     {"ic=", ""},
1070     {"oc=", ""},
1071     {"base64","jMB"},
1072     {"euc","e"},
1073     {"euc-input","E"},
1074     {"fj","jm"},
1075     {"help","v"},
1076     {"jis","j"},
1077     {"jis-input","J"},
1078     {"mac","sLm"},
1079     {"mime","jM"},
1080     {"mime-input","m"},
1081     {"msdos","sLw"},
1082     {"sjis","s"},
1083     {"sjis-input","S"},
1084     {"unix","eLu"},
1085     {"version","V"},
1086     {"windows","sLw"},
1087     {"hiragana","h1"},
1088     {"katakana","h2"},
1089     {"katakana-hiragana","h3"},
1090     {"guess", "g"},
1091     {"cp932", ""},
1092     {"no-cp932", ""},
1093 #ifdef X0212_ENABLE
1094     {"x0212", ""},
1095 #endif
1096 #ifdef UTF8_OUTPUT_ENABLE
1097     {"utf8", "w"},
1098     {"utf16", "w16"},
1099     {"ms-ucs-map", ""},
1100     {"fb-skip", ""},
1101     {"fb-html", ""},
1102     {"fb-xml", ""},
1103     {"fb-perl", ""},
1104     {"fb-java", ""},
1105     {"fb-subchar", ""},
1106     {"fb-subchar=", ""},
1107 #endif
1108 #ifdef UTF8_INPUT_ENABLE
1109     {"utf8-input", "W"},
1110     {"utf16-input", "W16"},
1111     {"no-cp932ext", ""},
1112     {"no-best-fit-chars",""},
1113 #endif
1114 #ifdef UNICODE_NORMALIZATION
1115     {"utf8mac-input", ""},
1116 #endif
1117 #ifdef OVERWRITE
1118     {"overwrite", ""},
1119     {"overwrite=", ""},
1120     {"in-place", ""},
1121     {"in-place=", ""},
1122 #endif
1123 #ifdef INPUT_OPTION
1124     {"cap-input", ""},
1125     {"url-input", ""},
1126 #endif
1127 #ifdef NUMCHAR_OPTION
1128     {"numchar-input", ""},
1129 #endif
1130 #ifdef CHECK_OPTION
1131     {"no-output", ""},
1132     {"debug", ""},
1133 #endif
1134 #ifdef SHIFTJIS_CP932
1135     {"cp932inv", ""},
1136 #endif
1137 #ifdef EXEC_IO
1138     {"exec-in", ""},
1139     {"exec-out", ""},
1140 #endif
1141     {"prefix=", ""},
1142 };
1143
1144 static int option_mode = 0;
1145
1146 void options(unsigned char *cp)
1147 {
1148     nkf_char i, j;
1149     unsigned char *p;
1150     unsigned char *cp_back = NULL;
1151     char codeset[32];
1152
1153     if (option_mode==1)
1154         return;
1155     while(*cp && *cp++!='-');
1156     while (*cp || cp_back) {
1157         if(!*cp){
1158             cp = cp_back;
1159             cp_back = NULL;
1160             continue;
1161         }
1162         p = 0;
1163         switch (*cp++) {
1164         case '-':  /* literal options */
1165             if (!*cp || *cp == SPACE) {        /* ignore the rest of arguments */
1166                 option_mode = 1;
1167                 return;
1168             }
1169             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1170                 p = (unsigned char *)long_option[i].name;
1171                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1172                 if (*p == cp[j] || cp[j] == ' '){
1173                     p = &cp[j] + 1;
1174                     break;
1175                 }
1176                 p = 0;
1177             }
1178             if (p == 0) return;
1179             while(*cp && *cp != SPACE && cp++);
1180             if (long_option[i].alias[0]){
1181                 cp_back = cp;
1182                 cp = (unsigned char *)long_option[i].alias;
1183             }else{
1184                 if (strcmp(long_option[i].name, "ic=") == 0){
1185                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1186                         codeset[i] = nkf_toupper(p[i]);
1187                     }
1188                     codeset[i] = 0;
1189                     if(strcmp(codeset, "ISO-2022-JP") == 0 ||
1190                       strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1191                       strcmp(codeset, "CP50220") == 0 ||
1192                       strcmp(codeset, "CP50221") == 0 ||
1193                       strcmp(codeset, "CP50222") == 0 ||
1194                       strcmp(codeset, "ISO-2022-JP-MS") == 0){
1195                         input_f = JIS_INPUT;
1196                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1197                         input_f = JIS_INPUT;
1198 #ifdef X0212_ENABLE
1199                         x0212_f = TRUE;
1200 #endif
1201                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1202                         input_f = JIS_INPUT;
1203 #ifdef X0212_ENABLE
1204                         x0212_f = TRUE;
1205 #endif
1206                         x0213_f = TRUE;
1207                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1208                         input_f = SJIS_INPUT;
1209                         if (x0201_f==NO_X0201) x0201_f=TRUE;
1210                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1211                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1212                              strcmp(codeset, "CP932") == 0 ||
1213                              strcmp(codeset, "MS932") == 0){
1214                         input_f = SJIS_INPUT;
1215                         x0201_f = FALSE;
1216 #ifdef SHIFTJIS_CP932
1217                         cp51932_f = TRUE;
1218 #endif
1219 #ifdef UTF8_OUTPUT_ENABLE
1220                         ms_ucs_map_f = UCS_MAP_CP932;
1221 #endif
1222                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1223                              strcmp(codeset, "EUC-JP") == 0){
1224                         input_f = EUC_INPUT;
1225                     }else if(strcmp(codeset, "CP51932") == 0){
1226                         input_f = EUC_INPUT;
1227                         x0201_f = FALSE;
1228 #ifdef SHIFTJIS_CP932
1229                         cp51932_f = TRUE;
1230 #endif
1231 #ifdef UTF8_OUTPUT_ENABLE
1232                         ms_ucs_map_f = UCS_MAP_CP932;
1233 #endif
1234                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1235                              strcmp(codeset, "EUCJP-MS") == 0 ||
1236                              strcmp(codeset, "EUCJPMS") == 0){
1237                         input_f = EUC_INPUT;
1238                         x0201_f = FALSE;
1239 #ifdef SHIFTJIS_CP932
1240                         cp51932_f = FALSE;
1241 #endif
1242 #ifdef UTF8_OUTPUT_ENABLE
1243                         ms_ucs_map_f = UCS_MAP_MS;
1244 #endif
1245                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1246                              strcmp(codeset, "EUCJP-ASCII") == 0){
1247                         input_f = EUC_INPUT;
1248                         x0201_f = FALSE;
1249 #ifdef SHIFTJIS_CP932
1250                         cp51932_f = FALSE;
1251 #endif
1252 #ifdef UTF8_OUTPUT_ENABLE
1253                         ms_ucs_map_f = UCS_MAP_ASCII;
1254 #endif
1255                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1256                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1257                         input_f = SJIS_INPUT;
1258                         x0213_f = TRUE;
1259 #ifdef SHIFTJIS_CP932
1260                         cp51932_f = FALSE;
1261                         cp932inv_f = FALSE;
1262 #endif
1263                         if (x0201_f==NO_X0201) x0201_f=TRUE;
1264                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1265                              strcmp(codeset, "EUC-JIS-2004") == 0){
1266                         input_f = EUC_INPUT;
1267                         x0201_f = FALSE;
1268                         x0213_f = TRUE;
1269 #ifdef SHIFTJIS_CP932
1270                         cp51932_f = FALSE;
1271                         cp932inv_f = FALSE;
1272 #endif
1273 #ifdef UTF8_INPUT_ENABLE
1274                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1275                              strcmp(codeset, "UTF-8N") == 0 ||
1276                              strcmp(codeset, "UTF-8-BOM") == 0){
1277                         input_f = UTF8_INPUT;
1278 #ifdef UNICODE_NORMALIZATION
1279                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1280                              strcmp(codeset, "UTF-8-MAC") == 0){
1281                         input_f = UTF8_INPUT;
1282                         nfc_f = TRUE;
1283 #endif
1284                     }else if(strcmp(codeset, "UTF-16") == 0){
1285                         input_f = UTF16BE_INPUT;
1286                         utf16_mode = UTF16BE_INPUT;
1287                     }else if(strcmp(codeset, "UTF-16BE") == 0 ||
1288                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1289                         input_f = UTF16BE_INPUT;
1290                         utf16_mode = UTF16BE_INPUT;
1291                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1292                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1293                         input_f = UTF16LE_INPUT;
1294                         utf16_mode = UTF16LE_INPUT;
1295 #endif
1296                     }
1297                     continue;
1298                 }
1299                 if (strcmp(long_option[i].name, "oc=") == 0){
1300                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1301                         codeset[i] = nkf_toupper(p[i]);
1302                     }
1303                     codeset[i] = 0;
1304                     if(strcmp(codeset, "ISO-2022-JP") == 0 ||
1305                        strcmp(codeset, "CP50220") == 0){
1306                         output_conv = j_oconv;
1307                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1308                         output_conv = j_oconv;
1309                         no_cp932ext_f = TRUE;
1310                     }else if(strcmp(codeset, "CP50221") == 0 ||
1311                              strcmp(codeset, "ISO-2022-JP-MS") == 0){
1312                         output_conv = j_oconv;
1313                         x0201_f = FALSE;
1314                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1315                         output_conv = j_oconv;
1316 #ifdef X0212_ENABLE
1317                         x0212_f = TRUE;
1318 #endif
1319 #ifdef SHIFTJIS_CP932
1320                         cp51932_f = FALSE;
1321 #endif
1322                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1323                         output_conv = j_oconv;
1324 #ifdef X0212_ENABLE
1325                         x0212_f = TRUE;
1326 #endif
1327                         x0213_f = TRUE;
1328 #ifdef SHIFTJIS_CP932
1329                         cp51932_f = FALSE;
1330 #endif
1331                     }else if(strcmp(codeset, "ISO-2022-JP-MS") == 0){
1332                         output_conv = j_oconv;
1333                         x0201_f = FALSE;
1334 #ifdef X0212_ENABLE
1335                         x0212_f = TRUE;
1336 #endif
1337 #ifdef SHIFTJIS_CP932
1338                         cp51932_f = FALSE;
1339 #endif
1340                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1341                         output_conv = s_oconv;
1342                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1343                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1344                              strcmp(codeset, "CP932") == 0 ||
1345                              strcmp(codeset, "MS932") == 0){
1346                         output_conv = s_oconv;
1347                         x0201_f = FALSE;
1348 #ifdef SHIFTJIS_CP932
1349                         cp51932_f = TRUE;
1350                         cp932inv_f = TRUE;
1351 #endif
1352 #ifdef UTF8_OUTPUT_ENABLE
1353                         ms_ucs_map_f = UCS_MAP_CP932;
1354 #endif
1355                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1356                              strcmp(codeset, "EUC-JP") == 0){
1357                         output_conv = e_oconv;
1358                     }else if(strcmp(codeset, "CP51932") == 0){
1359                         output_conv = e_oconv;
1360                         x0201_f = FALSE;
1361 #ifdef SHIFTJIS_CP932
1362                         cp51932_f = TRUE;
1363 #endif
1364 #ifdef UTF8_OUTPUT_ENABLE
1365                         ms_ucs_map_f = UCS_MAP_CP932;
1366 #endif
1367                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1368                              strcmp(codeset, "EUCJP-MS") == 0 ||
1369                              strcmp(codeset, "EUCJPMS") == 0){
1370                         output_conv = e_oconv;
1371                         x0201_f = FALSE;
1372 #ifdef X0212_ENABLE
1373                         x0212_f = TRUE;
1374 #endif
1375 #ifdef SHIFTJIS_CP932
1376                         cp51932_f = FALSE;
1377 #endif
1378 #ifdef UTF8_OUTPUT_ENABLE
1379                         ms_ucs_map_f = UCS_MAP_MS;
1380 #endif
1381                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1382                              strcmp(codeset, "EUCJP-ASCII") == 0){
1383                         output_conv = e_oconv;
1384                         x0201_f = FALSE;
1385 #ifdef X0212_ENABLE
1386                         x0212_f = TRUE;
1387 #endif
1388 #ifdef SHIFTJIS_CP932
1389                         cp51932_f = FALSE;
1390 #endif
1391 #ifdef UTF8_OUTPUT_ENABLE
1392                         ms_ucs_map_f = UCS_MAP_ASCII;
1393 #endif
1394                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1395                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1396                         output_conv = s_oconv;
1397                         x0213_f = TRUE;
1398 #ifdef SHIFTJIS_CP932
1399                         cp932inv_f = FALSE;
1400 #endif
1401                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1402                              strcmp(codeset, "EUC-JIS-2004") == 0){
1403                         output_conv = e_oconv;
1404 #ifdef X0212_ENABLE
1405                         x0212_f = TRUE;
1406 #endif
1407                         x0213_f = TRUE;
1408 #ifdef SHIFTJIS_CP932
1409                         cp51932_f = FALSE;
1410 #endif
1411 #ifdef UTF8_OUTPUT_ENABLE
1412                     }else if(strcmp(codeset, "UTF-8") == 0){
1413                         output_conv = w_oconv;
1414                     }else if(strcmp(codeset, "UTF-8N") == 0){
1415                         output_conv = w_oconv;
1416                         unicode_bom_f=1;
1417                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1418                         output_conv = w_oconv;
1419                         unicode_bom_f=2;
1420                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1421                         output_conv = w_oconv16; 
1422                         unicode_bom_f=1;
1423                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1424                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1425                         output_conv = w_oconv16; 
1426                         unicode_bom_f=2;
1427                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1428                         output_conv = w_oconv16; 
1429                         w_oconv16_LE = 1;
1430                         unicode_bom_f=1;
1431                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1432                         output_conv = w_oconv16; 
1433                         w_oconv16_LE = 1;
1434                         unicode_bom_f=2;
1435 #endif
1436                     }
1437                     continue;
1438                 }
1439 #ifdef OVERWRITE
1440                 if (strcmp(long_option[i].name, "overwrite") == 0){
1441                     file_out_f = TRUE;
1442                     overwrite_f = TRUE;
1443                     preserve_time_f = TRUE;
1444                     continue;
1445                 }
1446                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1447                     file_out_f = TRUE;
1448                     overwrite_f = TRUE;
1449                     preserve_time_f = TRUE;
1450                     backup_f = TRUE;
1451                     backup_suffix = malloc(strlen((char *) p) + 1);
1452                     strcpy(backup_suffix, (char *) p);
1453                     continue;
1454                 }
1455                 if (strcmp(long_option[i].name, "in-place") == 0){
1456                     file_out_f = TRUE;
1457                     overwrite_f = TRUE;
1458                     preserve_time_f = FALSE;
1459                     continue;
1460                 }
1461                 if (strcmp(long_option[i].name, "in-place=") == 0){
1462                     file_out_f = TRUE;
1463                     overwrite_f = TRUE;
1464                     preserve_time_f = FALSE;
1465                     backup_f = TRUE;
1466                     backup_suffix = malloc(strlen((char *) p) + 1);
1467                     strcpy(backup_suffix, (char *) p);
1468                     continue;
1469                 }
1470 #endif
1471 #ifdef INPUT_OPTION
1472                 if (strcmp(long_option[i].name, "cap-input") == 0){
1473                     cap_f = TRUE;
1474                     continue;
1475                 }
1476                 if (strcmp(long_option[i].name, "url-input") == 0){
1477                     url_f = TRUE;
1478                     continue;
1479                 }
1480 #endif
1481 #ifdef NUMCHAR_OPTION
1482                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1483                     numchar_f = TRUE;
1484                     continue;
1485                 }
1486 #endif
1487 #ifdef CHECK_OPTION
1488                 if (strcmp(long_option[i].name, "no-output") == 0){
1489                     noout_f = TRUE;
1490                     continue;
1491                 }
1492                 if (strcmp(long_option[i].name, "debug") == 0){
1493                     debug_f = TRUE;
1494                     continue;
1495                 }
1496 #endif
1497                 if (strcmp(long_option[i].name, "cp932") == 0){
1498 #ifdef SHIFTJIS_CP932
1499                     cp51932_f = TRUE;
1500                     cp932inv_f = TRUE;
1501 #endif
1502 #ifdef UTF8_OUTPUT_ENABLE
1503                     ms_ucs_map_f = UCS_MAP_CP932;
1504 #endif
1505                     continue;
1506                 }
1507                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1508 #ifdef SHIFTJIS_CP932
1509                     cp51932_f = FALSE;
1510                     cp932inv_f = FALSE;
1511 #endif
1512 #ifdef UTF8_OUTPUT_ENABLE
1513                     ms_ucs_map_f = UCS_MAP_ASCII;
1514 #endif
1515                     continue;
1516                 }
1517 #ifdef SHIFTJIS_CP932
1518                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1519                     cp932inv_f = TRUE;
1520                     continue;
1521                 }
1522 #endif
1523
1524 #ifdef X0212_ENABLE
1525                 if (strcmp(long_option[i].name, "x0212") == 0){
1526                     x0212_f = TRUE;
1527                     continue;
1528                 }
1529 #endif
1530
1531 #ifdef EXEC_IO
1532                   if (strcmp(long_option[i].name, "exec-in") == 0){
1533                       exec_f = 1;
1534                       return;
1535                   }
1536                   if (strcmp(long_option[i].name, "exec-out") == 0){
1537                       exec_f = -1;
1538                       return;
1539                   }
1540 #endif
1541 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1542                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1543                     no_cp932ext_f = TRUE;
1544                     continue;
1545                 }
1546                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1547                     no_best_fit_chars_f = TRUE;
1548                     continue;
1549                 }
1550                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1551                     encode_fallback = NULL;
1552                     continue;
1553                 }
1554                 if (strcmp(long_option[i].name, "fb-html") == 0){
1555                     encode_fallback = encode_fallback_html;
1556                     continue;
1557                 }
1558                 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1559                     encode_fallback = encode_fallback_xml;
1560                     continue;
1561                 }
1562                 if (strcmp(long_option[i].name, "fb-java") == 0){
1563                     encode_fallback = encode_fallback_java;
1564                     continue;
1565                 }
1566                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1567                     encode_fallback = encode_fallback_perl;
1568                     continue;
1569                 }
1570                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1571                     encode_fallback = encode_fallback_subchar;
1572                     continue;
1573                 }
1574                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1575                     encode_fallback = encode_fallback_subchar;
1576                     unicode_subchar = 0;
1577                     if (p[0] != '0'){
1578                         /* decimal number */
1579                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1580                             unicode_subchar *= 10;
1581                             unicode_subchar += hex2bin(p[i]);
1582                         }
1583                     }else if(p[1] == 'x' || p[1] == 'X'){
1584                         /* hexadecimal number */
1585                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1586                             unicode_subchar <<= 4;
1587                             unicode_subchar |= hex2bin(p[i]);
1588                         }
1589                     }else{
1590                         /* octal number */
1591                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1592                             unicode_subchar *= 8;
1593                             unicode_subchar += hex2bin(p[i]);
1594                         }
1595                     }
1596                     w16e_conv(unicode_subchar, &i, &j);
1597                     unicode_subchar = i<<8 | j;
1598                     continue;
1599                 }
1600 #endif
1601 #ifdef UTF8_OUTPUT_ENABLE
1602                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1603                     ms_ucs_map_f = UCS_MAP_MS;
1604                     continue;
1605                 }
1606 #endif
1607 #ifdef UNICODE_NORMALIZATION
1608                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1609                     input_f = UTF8_INPUT;
1610                     nfc_f = TRUE;
1611                     continue;
1612                 }
1613 #endif
1614                 if (strcmp(long_option[i].name, "prefix=") == 0){
1615                     if (nkf_isgraph(p[0])){
1616                         for (i = 1; nkf_isgraph(p[i]); i++){
1617                             prefix_table[p[i]] = p[0];
1618                         }
1619                     }
1620                     continue;
1621                 }
1622             }
1623             continue;
1624         case 'b':           /* buffered mode */
1625             unbuf_f = FALSE;
1626             continue;
1627         case 'u':           /* non bufferd mode */
1628             unbuf_f = TRUE;
1629             continue;
1630         case 't':           /* transparent mode */
1631             if (*cp=='1') {
1632                 /* alias of -t */
1633                 nop_f = TRUE;
1634                 *cp++;
1635             } else if (*cp=='2') {
1636                 /*
1637                  * -t with put/get
1638                  *
1639                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1640                  *
1641                  */
1642                 nop_f = 2;
1643                 *cp++;
1644             } else
1645                 nop_f = TRUE;
1646             continue;
1647         case 'j':           /* JIS output */
1648         case 'n':
1649             output_conv = j_oconv;
1650             continue;
1651         case 'e':           /* AT&T EUC output */
1652             output_conv = e_oconv;
1653             continue;
1654         case 's':           /* SJIS output */
1655             output_conv = s_oconv;
1656             continue;
1657         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1658             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1659             input_f = LATIN1_INPUT;
1660             continue;
1661         case 'i':           /* Kanji IN ESC-$-@/B */
1662             if (*cp=='@'||*cp=='B') 
1663                 kanji_intro = *cp++;
1664             continue;
1665         case 'o':           /* ASCII IN ESC-(-J/B */
1666             if (*cp=='J'||*cp=='B'||*cp=='H') 
1667                 ascii_intro = *cp++;
1668             continue;
1669         case 'h':
1670             /*  
1671                 bit:1   katakana->hiragana
1672                 bit:2   hiragana->katakana
1673             */
1674             if ('9'>= *cp && *cp>='0') 
1675                 hira_f |= (*cp++ -'0');
1676             else 
1677                 hira_f |= 1;
1678             continue;
1679         case 'r':
1680             rot_f = TRUE;
1681             continue;
1682 #if defined(MSDOS) || defined(__OS2__) 
1683         case 'T':
1684             binmode_f = FALSE;
1685             continue;
1686 #endif
1687 #ifndef PERL_XS
1688         case 'V':
1689             version();
1690             exit(1);
1691             break;
1692         case 'v':
1693             usage();
1694             exit(1);
1695             break;
1696 #endif
1697 #ifdef UTF8_OUTPUT_ENABLE
1698         case 'w':           /* UTF-8 output */
1699             if ('1'== cp[0] && '6'==cp[1]) {
1700                 output_conv = w_oconv16; cp+=2;
1701                 if (cp[0]=='L') {
1702                     unicode_bom_f=2; cp++;
1703                     w_oconv16_LE = 1;
1704                     if (cp[0] == '0'){
1705                         unicode_bom_f=1; cp++;
1706                     }
1707                 } else if (cp[0] == 'B') {
1708                     unicode_bom_f=2; cp++;
1709                     if (cp[0] == '0'){
1710                         unicode_bom_f=1; cp++;
1711                     }
1712                 } 
1713             } else if (cp[0] == '8') {
1714                 output_conv = w_oconv; cp++;
1715                 unicode_bom_f=2;
1716                 if (cp[0] == '0'){
1717                     unicode_bom_f=1; cp++;
1718                 }
1719             } else
1720                 output_conv = w_oconv;
1721             continue;
1722 #endif
1723 #ifdef UTF8_INPUT_ENABLE
1724         case 'W':           /* UTF-8 input */
1725             if ('1'== cp[0] && '6'==cp[1]) {
1726                 input_f = UTF16BE_INPUT;
1727                 utf16_mode = UTF16BE_INPUT;
1728                 cp += 2;
1729                 if (cp[0]=='L') {
1730                     cp++;
1731                     input_f = UTF16LE_INPUT;
1732                     utf16_mode = UTF16LE_INPUT;
1733                 } else if (cp[0] == 'B') {
1734                     cp++;
1735                     input_f = UTF16BE_INPUT;
1736                     utf16_mode = UTF16BE_INPUT;
1737                 }
1738             } else if (cp[0] == '8') {
1739                 cp++;
1740                 input_f = UTF8_INPUT;
1741             } else
1742                 input_f = UTF8_INPUT;
1743             continue;
1744 #endif
1745         /* Input code assumption */
1746         case 'J':   /* JIS input */
1747             input_f = JIS_INPUT;
1748             continue;
1749         case 'E':   /* AT&T EUC input */
1750             input_f = EUC_INPUT;
1751             continue;
1752         case 'S':   /* MS Kanji input */
1753             input_f = SJIS_INPUT;
1754             if (x0201_f==NO_X0201) x0201_f=TRUE;
1755             continue;
1756         case 'Z':   /* Convert X0208 alphabet to asii */
1757             /*  bit:0   Convert X0208
1758                 bit:1   Convert Kankaku to one space
1759                 bit:2   Convert Kankaku to two spaces
1760                 bit:3   Convert HTML Entity
1761             */
1762             if ('9'>= *cp && *cp>='0') 
1763                 alpha_f |= 1<<(*cp++ -'0');
1764             else 
1765                 alpha_f |= TRUE;
1766             continue;
1767         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1768             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1769             /* accept  X0201
1770                     ESC-(-I     in JIS, EUC, MS Kanji
1771                     SI/SO       in JIS, EUC, MS Kanji
1772                     SSO         in EUC, JIS, not in MS Kanji
1773                     MS Kanji (0xa0-0xdf) 
1774                output  X0201
1775                     ESC-(-I     in JIS (0x20-0x5f)
1776                     SSO         in EUC (0xa0-0xdf)
1777                     0xa0-0xd    in MS Kanji (0xa0-0xdf) 
1778             */
1779             continue;
1780         case 'X':   /* Assume X0201 kana */
1781             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1782             x0201_f = TRUE;
1783             continue;
1784         case 'F':   /* prserve new lines */
1785             fold_preserve_f = TRUE;
1786         case 'f':   /* folding -f60 or -f */
1787             fold_f = TRUE;
1788             fold_len = 0;
1789             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1790                 fold_len *= 10;
1791                 fold_len += *cp++ - '0';
1792             }
1793             if (!(0<fold_len && fold_len<BUFSIZ)) 
1794                 fold_len = DEFAULT_FOLD;
1795             if (*cp=='-') {
1796                 fold_margin = 0;
1797                 cp++;
1798                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1799                     fold_margin *= 10;
1800                     fold_margin += *cp++ - '0';
1801                 }
1802             }
1803             continue;
1804         case 'm':   /* MIME support */
1805             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1806             if (*cp=='B'||*cp=='Q') {
1807                 mime_decode_mode = *cp++;
1808                 mimebuf_f = FIXED_MIME;
1809             } else if (*cp=='N') {
1810                 mime_f = TRUE; cp++;
1811             } else if (*cp=='S') {
1812                 mime_f = STRICT_MIME; cp++;
1813             } else if (*cp=='0') {
1814                 mime_decode_f = FALSE;
1815                 mime_f = FALSE; cp++;
1816             }
1817             continue;
1818         case 'M':   /* MIME output */
1819             if (*cp=='B') {
1820                 mimeout_mode = 'B';
1821                 mimeout_f = FIXED_MIME; cp++;
1822             } else if (*cp=='Q') {
1823                 mimeout_mode = 'Q';
1824                 mimeout_f = FIXED_MIME; cp++;
1825             } else {
1826                 mimeout_f = TRUE;
1827             }
1828             continue;
1829         case 'B':   /* Broken JIS support */
1830             /*  bit:0   no ESC JIS
1831                 bit:1   allow any x on ESC-(-x or ESC-$-x
1832                 bit:2   reset to ascii on NL
1833             */
1834             if ('9'>= *cp && *cp>='0') 
1835                 broken_f |= 1<<(*cp++ -'0');
1836             else 
1837                 broken_f |= TRUE;
1838             continue;
1839 #ifndef PERL_XS
1840         case 'O':/* for Output file */
1841             file_out_f = TRUE;
1842             continue;
1843 #endif
1844         case 'c':/* add cr code */
1845             crmode_f = CRLF;
1846             continue;
1847         case 'd':/* delete cr code */
1848             crmode_f = NL;
1849             continue;
1850         case 'I':   /* ISO-2022-JP output */
1851             iso2022jp_f = TRUE;
1852             continue;
1853         case 'L':  /* line mode */
1854             if (*cp=='u') {         /* unix */
1855                 crmode_f = NL; cp++;
1856             } else if (*cp=='m') { /* mac */
1857                 crmode_f = CR; cp++;
1858             } else if (*cp=='w') { /* windows */
1859                 crmode_f = CRLF; cp++;
1860             } else if (*cp=='0') { /* no conversion  */
1861                 crmode_f = 0; cp++;
1862             }
1863             continue;
1864         case 'g':
1865 #ifndef PERL_XS
1866             guess_f = TRUE;
1867 #endif
1868             continue;
1869         case ' ':    
1870         /* module muliple options in a string are allowed for Perl moudle  */
1871             while(*cp && *cp++!='-');
1872             continue;
1873         default:
1874             /* bogus option but ignored */
1875             continue;
1876         }
1877     }
1878 }
1879
1880 #ifdef ANSI_C_PROTOTYPE
1881 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1882 #else
1883 struct input_code * find_inputcode_byfunc(iconv_func)
1884      nkf_char (*iconv_func)();
1885 #endif
1886 {
1887     if (iconv_func){
1888         struct input_code *p = input_code_list;
1889         while (p->name){
1890             if (iconv_func == p->iconv_func){
1891                 return p;
1892             }
1893             p++;
1894         }
1895     }
1896     return 0;
1897 }
1898
1899 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1900 {
1901 #ifdef INPUT_CODE_FIX
1902     if (f || !input_f)
1903 #endif
1904         if (estab_f != f){
1905             estab_f = f;
1906         }
1907
1908     if (iconv_func
1909 #ifdef INPUT_CODE_FIX
1910         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1911 #endif
1912         ){
1913         iconv = iconv_func;
1914     }
1915 #ifdef CHECK_OPTION
1916     if (estab_f && iconv_for_check != iconv){
1917         struct input_code *p = find_inputcode_byfunc(iconv);
1918         if (p){
1919             set_input_codename(p->name);
1920             debug(input_codename);
1921         }
1922         iconv_for_check = iconv;
1923     }
1924 #endif
1925 }
1926
1927 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1928 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1929 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1930 #ifdef SHIFTJIS_CP932
1931 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B */
1932 #define SCORE_NO_EXIST (SCORE_CP932 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1933 #else
1934 #define SCORE_NO_EXIST (SCORE_DEPEND << 1)   /* \e$BB8:_$7$J$$J8;z\e(B */
1935 #endif
1936 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1937 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1938
1939 #define SCORE_INIT (SCORE_iMIME)
1940
1941 const nkf_char score_table_A0[] = {
1942     0, 0, 0, 0,
1943     0, 0, 0, 0,
1944     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1945     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1946 };
1947
1948 const nkf_char score_table_F0[] = {
1949     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1950     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1951     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1952     SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1953 };
1954
1955 void set_code_score(struct input_code *ptr, nkf_char score)
1956 {
1957     if (ptr){
1958         ptr->score |= score;
1959     }
1960 }
1961
1962 void clr_code_score(struct input_code *ptr, nkf_char score)
1963 {
1964     if (ptr){
1965         ptr->score &= ~score;
1966     }
1967 }
1968
1969 void code_score(struct input_code *ptr)
1970 {
1971     nkf_char c2 = ptr->buf[0];
1972 #ifdef UTF8_OUTPUT_ENABLE
1973     nkf_char c1 = ptr->buf[1];
1974 #endif
1975     if (c2 < 0){
1976         set_code_score(ptr, SCORE_ERROR);
1977     }else if (c2 == SSO){
1978         set_code_score(ptr, SCORE_KANA);
1979 #ifdef UTF8_OUTPUT_ENABLE
1980     }else if (!e2w_conv(c2, c1)){
1981         set_code_score(ptr, SCORE_NO_EXIST);
1982 #endif
1983     }else if ((c2 & 0x70) == 0x20){
1984         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1985     }else if ((c2 & 0x70) == 0x70){
1986         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1987     }else if ((c2 & 0x70) >= 0x50){
1988         set_code_score(ptr, SCORE_L2);
1989     }
1990 }
1991
1992 void status_disable(struct input_code *ptr)
1993 {
1994     ptr->stat = -1;
1995     ptr->buf[0] = -1;
1996     code_score(ptr);
1997     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1998 }
1999
2000 void status_push_ch(struct input_code *ptr, nkf_char c)
2001 {
2002     ptr->buf[ptr->index++] = c;
2003 }
2004
2005 void status_clear(struct input_code *ptr)
2006 {
2007     ptr->stat = 0;
2008     ptr->index = 0;
2009 }
2010
2011 void status_reset(struct input_code *ptr)
2012 {
2013     status_clear(ptr);
2014     ptr->score = SCORE_INIT;
2015 }
2016
2017 void status_reinit(struct input_code *ptr)
2018 {
2019     status_reset(ptr);
2020     ptr->_file_stat = 0;
2021 }
2022
2023 void status_check(struct input_code *ptr, nkf_char c)
2024 {
2025     if (c <= DEL && estab_f){
2026         status_reset(ptr);
2027     }
2028 }
2029
2030 void s_status(struct input_code *ptr, nkf_char c)
2031 {
2032     switch(ptr->stat){
2033       case -1:
2034           status_check(ptr, c);
2035           break;
2036       case 0:
2037           if (c <= DEL){
2038               break;
2039 #ifdef NUMCHAR_OPTION
2040           }else if (is_unicode_capsule(c)){
2041               break;
2042 #endif
2043           }else if (0xa1 <= c && c <= 0xdf){
2044               status_push_ch(ptr, SSO);
2045               status_push_ch(ptr, c);
2046               code_score(ptr);
2047               status_clear(ptr);
2048           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2049               ptr->stat = 1;
2050               status_push_ch(ptr, c);
2051 #ifdef SHIFTJIS_CP932
2052           }else if (cp51932_f
2053                     && is_ibmext_in_sjis(c)){
2054               ptr->stat = 2;
2055               status_push_ch(ptr, c);
2056 #endif /* SHIFTJIS_CP932 */
2057 #ifdef X0212_ENABLE
2058           }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2059               ptr->stat = 1;
2060               status_push_ch(ptr, c);
2061 #endif /* X0212_ENABLE */
2062           }else{
2063               status_disable(ptr);
2064           }
2065           break;
2066       case 1:
2067           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2068               status_push_ch(ptr, c);
2069               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2070               code_score(ptr);
2071               status_clear(ptr);
2072           }else{
2073               status_disable(ptr);
2074           }
2075           break;
2076       case 2:
2077 #ifdef SHIFTJIS_CP932
2078           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2079               status_push_ch(ptr, c);
2080               if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2081                   set_code_score(ptr, SCORE_CP932);
2082                   status_clear(ptr);
2083                   break;
2084               }
2085           }
2086 #endif /* SHIFTJIS_CP932 */
2087 #ifndef X0212_ENABLE
2088           status_disable(ptr);
2089 #endif
2090           break;
2091     }
2092 }
2093
2094 void e_status(struct input_code *ptr, nkf_char c)
2095 {
2096     switch (ptr->stat){
2097       case -1:
2098           status_check(ptr, c);
2099           break;
2100       case 0:
2101           if (c <= DEL){
2102               break;
2103 #ifdef NUMCHAR_OPTION
2104           }else if (is_unicode_capsule(c)){
2105               break;
2106 #endif
2107           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2108               ptr->stat = 1;
2109               status_push_ch(ptr, c);
2110 #ifdef X0212_ENABLE
2111           }else if (0x8f == c){
2112               ptr->stat = 2;
2113               status_push_ch(ptr, c);
2114 #endif /* X0212_ENABLE */
2115           }else{
2116               status_disable(ptr);
2117           }
2118           break;
2119       case 1:
2120           if (0xa1 <= c && c <= 0xfe){
2121               status_push_ch(ptr, c);
2122               code_score(ptr);
2123               status_clear(ptr);
2124           }else{
2125               status_disable(ptr);
2126           }
2127           break;
2128 #ifdef X0212_ENABLE
2129       case 2:
2130           if (0xa1 <= c && c <= 0xfe){
2131               ptr->stat = 1;
2132               status_push_ch(ptr, c);
2133           }else{
2134               status_disable(ptr);
2135           }
2136 #endif /* X0212_ENABLE */
2137     }
2138 }
2139
2140 #ifdef UTF8_INPUT_ENABLE
2141 void w16_status(struct input_code *ptr, nkf_char c)
2142 {
2143     switch (ptr->stat){
2144       case -1:
2145           break;
2146       case 0:
2147           if (ptr->_file_stat == 0){
2148               if (c == 0xfe || c == 0xff){
2149                   ptr->stat = c;
2150                   status_push_ch(ptr, c);
2151                   ptr->_file_stat = 1;
2152               }else{
2153                   status_disable(ptr);
2154                   ptr->_file_stat = -1;
2155               }
2156           }else if (ptr->_file_stat > 0){
2157               ptr->stat = 1;
2158               status_push_ch(ptr, c);
2159           }else if (ptr->_file_stat < 0){
2160               status_disable(ptr);
2161           }
2162           break;
2163
2164       case 1:
2165           if (c == EOF){
2166               status_disable(ptr);
2167               ptr->_file_stat = -1;
2168           }else{
2169               status_push_ch(ptr, c);
2170               status_clear(ptr);
2171           }
2172           break;
2173
2174       case 0xfe:
2175       case 0xff:
2176           if (ptr->stat != c && (c == 0xfe || c == 0xff)){
2177               status_push_ch(ptr, c);
2178               status_clear(ptr);
2179           }else{
2180               status_disable(ptr);
2181               ptr->_file_stat = -1;
2182           }
2183           break;
2184     }
2185 }
2186
2187 void w_status(struct input_code *ptr, nkf_char c)
2188 {
2189     switch (ptr->stat){
2190       case -1:
2191           status_check(ptr, c);
2192           break;
2193       case 0:
2194           if (c <= DEL){
2195               break;
2196 #ifdef NUMCHAR_OPTION
2197           }else if (is_unicode_capsule(c)){
2198               break;
2199 #endif
2200           }else if (0xc0 <= c && c <= 0xdf){
2201               ptr->stat = 1;
2202               status_push_ch(ptr, c);
2203           }else if (0xe0 <= c && c <= 0xef){
2204               ptr->stat = 2;
2205               status_push_ch(ptr, c);
2206           }else{
2207               status_disable(ptr);
2208           }
2209           break;
2210       case 1:
2211       case 2:
2212           if (0x80 <= c && c <= 0xbf){
2213               status_push_ch(ptr, c);
2214               if (ptr->index > ptr->stat){
2215                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2216                              && ptr->buf[2] == 0xbf);
2217                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2218                            &ptr->buf[0], &ptr->buf[1]);
2219                   if (!bom){
2220                       code_score(ptr);
2221                   }
2222                   status_clear(ptr);
2223               }
2224           }else{
2225               status_disable(ptr);
2226           }
2227           break;
2228     }
2229 }
2230 #endif
2231
2232 void code_status(nkf_char c)
2233 {
2234     int action_flag = 1;
2235     struct input_code *result = 0;
2236     struct input_code *p = input_code_list;
2237     while (p->name){
2238         (p->status_func)(p, c);
2239         if (p->stat > 0){
2240             action_flag = 0;
2241         }else if(p->stat == 0){
2242             if (result){
2243                 action_flag = 0;
2244             }else{
2245                 result = p;
2246             }
2247         }
2248         ++p;
2249     }
2250
2251     if (action_flag){
2252         if (result && !estab_f){
2253             set_iconv(TRUE, result->iconv_func);
2254         }else if (c <= DEL){
2255             struct input_code *ptr = input_code_list;
2256             while (ptr->name){
2257                 status_reset(ptr);
2258                 ++ptr;
2259             }
2260         }
2261     }
2262 }
2263
2264 #ifndef WIN32DLL
2265 nkf_char std_getc(FILE *f)
2266 {
2267     if (std_gc_ndx){
2268         return std_gc_buf[--std_gc_ndx];
2269     }
2270     return getc(f);
2271 }
2272 #endif /*WIN32DLL*/
2273
2274 nkf_char std_ungetc(nkf_char c, FILE *f)
2275 {
2276     if (std_gc_ndx == STD_GC_BUFSIZE){
2277         return EOF;
2278     }
2279     std_gc_buf[std_gc_ndx++] = c;
2280     return c;
2281 }
2282
2283 #ifndef WIN32DLL
2284 void std_putc(nkf_char c)
2285 {
2286     if(c!=EOF)
2287       putchar(c);
2288 }
2289 #endif /*WIN32DLL*/
2290
2291 #if !defined(PERL_XS) && !defined(WIN32DLL)
2292 nkf_char noconvert(FILE *f)
2293 {
2294     nkf_char    c;
2295
2296     if (nop_f == 2)
2297         module_connection();
2298     while ((c = (*i_getc)(f)) != EOF)
2299       (*o_putc)(c);
2300     (*o_putc)(EOF);
2301     return 1;
2302 }
2303 #endif
2304
2305 void module_connection(void)
2306 {
2307     oconv = output_conv; 
2308     o_putc = std_putc;
2309
2310     /* replace continucation module, from output side */
2311
2312     /* output redicrection */
2313 #ifdef CHECK_OPTION
2314     if (noout_f || guess_f){
2315         o_putc = no_putc;
2316     }
2317 #endif
2318     if (mimeout_f) {
2319         o_mputc = o_putc;
2320         o_putc = mime_putc;
2321         if (mimeout_f == TRUE) {
2322             o_base64conv = oconv; oconv = base64_conv;
2323         }
2324         /* base64_count = 0; */
2325     }
2326
2327     if (crmode_f) {
2328         o_crconv = oconv; oconv = cr_conv;
2329     }
2330     if (rot_f) {
2331         o_rot_conv = oconv; oconv = rot_conv;
2332     }
2333     if (iso2022jp_f) {
2334         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2335     }
2336     if (hira_f) {
2337         o_hira_conv = oconv; oconv = hira_conv;
2338     }
2339     if (fold_f) {
2340         o_fconv = oconv; oconv = fold_conv;
2341         f_line = 0;
2342     }
2343     if (alpha_f || x0201_f) {
2344         o_zconv = oconv; oconv = z_conv;
2345     }
2346
2347     i_getc = std_getc;
2348     i_ungetc = std_ungetc;
2349     /* input redicrection */
2350 #ifdef INPUT_OPTION
2351     if (cap_f){
2352         i_cgetc = i_getc; i_getc = cap_getc;
2353         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2354     }
2355     if (url_f){
2356         i_ugetc = i_getc; i_getc = url_getc;
2357         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2358     }
2359 #endif
2360 #ifdef NUMCHAR_OPTION
2361     if (numchar_f){
2362         i_ngetc = i_getc; i_getc = numchar_getc;
2363         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2364     }
2365 #endif
2366 #ifdef UNICODE_NORMALIZATION
2367     if (nfc_f && input_f == UTF8_INPUT){
2368         i_nfc_getc = i_getc; i_getc = nfc_getc;
2369         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2370     }
2371 #endif
2372     if (mime_f && mimebuf_f==FIXED_MIME) {
2373         i_mgetc = i_getc; i_getc = mime_getc;
2374         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2375     }
2376     if (broken_f & 1) {
2377         i_bgetc = i_getc; i_getc = broken_getc;
2378         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2379     }
2380     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2381         set_iconv(-TRUE, e_iconv);
2382     } else if (input_f == SJIS_INPUT) {
2383         set_iconv(-TRUE, s_iconv);
2384 #ifdef UTF8_INPUT_ENABLE
2385     } else if (input_f == UTF8_INPUT) {
2386         set_iconv(-TRUE, w_iconv);
2387     } else if (input_f == UTF16BE_INPUT) {
2388         set_iconv(-TRUE, w_iconv16);
2389     } else if (input_f == UTF16LE_INPUT) {
2390         set_iconv(-TRUE, w_iconv16);
2391 #endif
2392     } else {
2393         set_iconv(FALSE, e_iconv);
2394     }
2395
2396     {
2397         struct input_code *p = input_code_list;
2398         while (p->name){
2399             status_reinit(p++);
2400         }
2401     }
2402 }
2403
2404 /*
2405    Conversion main loop. Code detection only. 
2406  */
2407
2408 nkf_char kanji_convert(FILE *f)
2409 {
2410     nkf_char    c1,
2411                     c2, c3;
2412     int is_8bit = FALSE;
2413
2414     module_connection();
2415     c2 = 0;
2416
2417     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2418 #ifdef UTF8_INPUT_ENABLE
2419        || input_f == UTF8_INPUT || input_f == UTF16BE_INPUT || input_f == UTF16LE_INPUT
2420 #endif
2421       ){
2422         is_8bit = TRUE;
2423     }
2424
2425
2426     input_mode = ASCII;
2427     output_mode = ASCII;
2428     shift_mode = FALSE;
2429
2430 #define NEXT continue      /* no output, get next */
2431 #define SEND ;             /* output c1 and c2, get next */
2432 #define LAST break         /* end of loop, go closing  */
2433
2434     while ((c1 = (*i_getc)(f)) != EOF) {
2435 #ifdef INPUT_CODE_FIX
2436         if (!input_f)
2437 #endif
2438             code_status(c1);
2439         if (c2) {
2440             /* second byte */
2441             if (c2 > DEL) {
2442                 /* in case of 8th bit is on */
2443                 if (!estab_f&&!mime_decode_mode) {
2444                     /* in case of not established yet */
2445                     /* It is still ambiguious */
2446                     if (h_conv(f, c2, c1)==EOF) 
2447                         LAST;
2448                     else 
2449                         c2 = 0;
2450                     NEXT;
2451                 } else
2452                     /* in case of already established */
2453                     if (c1 < AT) {
2454                         /* ignore bogus code */
2455                         c2 = 0;
2456                         NEXT;
2457                     } else
2458                         SEND;
2459             } else
2460                 /* second byte, 7 bit code */
2461                 /* it might be kanji shitfted */
2462                 if ((c1 == DEL) || (c1 <= SPACE)) {
2463                     /* ignore bogus first code */
2464                     c2 = 0;
2465                     NEXT;
2466                 } else
2467                     SEND;
2468         } else {
2469             /* first byte */
2470             if (
2471 #ifdef UTF8_INPUT_ENABLE
2472                 iconv == w_iconv16
2473 #else
2474                 0
2475 #endif
2476                 ) {
2477                 c2 = c1;
2478                 c1 = (*i_getc)(f);
2479                 SEND;
2480 #ifdef NUMCHAR_OPTION
2481             } else if (is_unicode_capsule(c1)){
2482                 SEND;
2483 #endif
2484             } else if (c1 > DEL) {
2485                 /* 8 bit code */
2486                 if (!estab_f && !iso8859_f) {
2487                     /* not established yet */
2488                     c2 = c1;
2489                     NEXT;
2490                 } else { /* estab_f==TRUE */
2491                     if (iso8859_f) {
2492                         c2 = ISO8859_1;
2493                         c1 &= 0x7f;
2494                         SEND;
2495                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2496                         /* SJIS X0201 Case... */
2497                         if(iso2022jp_f && x0201_f==NO_X0201) {
2498                             (*oconv)(GETA1, GETA2);
2499                             NEXT;
2500                         } else {
2501                             c2 = X0201;
2502                             c1 &= 0x7f;
2503                             SEND;
2504                         }
2505                     } else if (c1==SSO && iconv != s_iconv) {
2506                         /* EUC X0201 Case */
2507                         c1 = (*i_getc)(f);  /* skip SSO */
2508                         code_status(c1);
2509                         if (SSP<=c1 && c1<0xe0) {
2510                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2511                                 (*oconv)(GETA1, GETA2);
2512                                 NEXT;
2513                             } else {
2514                                 c2 = X0201;
2515                                 c1 &= 0x7f;
2516                                 SEND;
2517                             }
2518                         } else  { /* bogus code, skip SSO and one byte */
2519                             NEXT;
2520                         }
2521                     } else {
2522                        /* already established */
2523                        c2 = c1;
2524                        NEXT;
2525                     }
2526                 }
2527             } else if ((c1 > SPACE) && (c1 != DEL)) {
2528                 /* in case of Roman characters */
2529                 if (shift_mode) { 
2530                     /* output 1 shifted byte */
2531                     if (iso8859_f) {
2532                         c2 = ISO8859_1;
2533                         SEND;
2534                     } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2535                       /* output 1 shifted byte */
2536                         if(iso2022jp_f && x0201_f==NO_X0201) {
2537                             (*oconv)(GETA1, GETA2);
2538                             NEXT;
2539                         } else {
2540                             c2 = X0201;
2541                             SEND;
2542                         }
2543                     } else {
2544                         /* look like bogus code */
2545                         NEXT;
2546                     }
2547                 } else if (input_mode == X0208 || input_mode == X0212 ||
2548                            input_mode == X0213_1 || input_mode == X0213_2) {
2549                     /* in case of Kanji shifted */
2550                     c2 = c1;
2551                     NEXT;
2552                 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2553                     /* Check MIME code */
2554                     if ((c1 = (*i_getc)(f)) == EOF) {
2555                         (*oconv)(0, '=');
2556                         LAST;
2557                     } else if (c1 == '?') {
2558                         /* =? is mime conversion start sequence */
2559                         if(mime_f == STRICT_MIME) {
2560                             /* check in real detail */
2561                             if (mime_begin_strict(f) == EOF) 
2562                                 LAST;
2563                             else
2564                                 NEXT;
2565                         } else if (mime_begin(f) == EOF) 
2566                             LAST;
2567                         else
2568                             NEXT;
2569                     } else {
2570                         (*oconv)(0, '=');
2571                         (*i_ungetc)(c1,f);
2572                         NEXT;
2573                     }
2574                 } else {
2575                     /* normal ASCII code */ 
2576                     SEND;
2577                 }
2578             } else if (!is_8bit && c1 == SI) {
2579                 shift_mode = FALSE; 
2580                 NEXT;
2581             } else if (!is_8bit && c1 == SO) {
2582                 shift_mode = TRUE; 
2583                 NEXT;
2584             } else if (!is_8bit && c1 == ESC ) {
2585                 if ((c1 = (*i_getc)(f)) == EOF) {
2586                     /*  (*oconv)(0, ESC); don't send bogus code */
2587                     LAST;
2588                 } else if (c1 == '$') {
2589                     if ((c1 = (*i_getc)(f)) == EOF) {
2590                         /*
2591                         (*oconv)(0, ESC); don't send bogus code 
2592                         (*oconv)(0, '$'); */
2593                         LAST;
2594                     } else if (c1 == '@'|| c1 == 'B') {
2595                         /* This is kanji introduction */
2596                         input_mode = X0208;
2597                         shift_mode = FALSE;
2598                         set_input_codename("ISO-2022-JP");
2599 #ifdef CHECK_OPTION
2600                         debug(input_codename);
2601 #endif
2602                         NEXT;
2603                     } else if (c1 == '(') {
2604                         if ((c1 = (*i_getc)(f)) == EOF) {
2605                             /* don't send bogus code 
2606                             (*oconv)(0, ESC);
2607                             (*oconv)(0, '$');
2608                             (*oconv)(0, '(');
2609                                 */
2610                             LAST;
2611                         } else if (c1 == '@'|| c1 == 'B') {
2612                             /* This is kanji introduction */
2613                             input_mode = X0208;
2614                             shift_mode = FALSE;
2615                             NEXT;
2616 #ifdef X0212_ENABLE
2617                         } else if (c1 == 'D'){
2618                             input_mode = X0212;
2619                             shift_mode = FALSE;
2620                             NEXT;
2621 #endif /* X0212_ENABLE */
2622                         } else if (c1 == (X0213_1&0x7F)){
2623                             input_mode = X0213_1;
2624                             shift_mode = FALSE;
2625                             NEXT;
2626                         } else if (c1 == (X0213_2&0x7F)){
2627                             input_mode = X0213_2;
2628                             shift_mode = FALSE;
2629                             NEXT;
2630                         } else {
2631                             /* could be some special code */
2632                             (*oconv)(0, ESC);
2633                             (*oconv)(0, '$');
2634                             (*oconv)(0, '(');
2635                             (*oconv)(0, c1);
2636                             NEXT;
2637                         }
2638                     } else if (broken_f&0x2) {
2639                         /* accept any ESC-(-x as broken code ... */
2640                         input_mode = X0208;
2641                         shift_mode = FALSE;
2642                         NEXT;
2643                     } else {
2644                         (*oconv)(0, ESC);
2645                         (*oconv)(0, '$');
2646                         (*oconv)(0, c1);
2647                         NEXT;
2648                     }
2649                 } else if (c1 == '(') {
2650                     if ((c1 = (*i_getc)(f)) == EOF) {
2651                         /* don't send bogus code 
2652                         (*oconv)(0, ESC);
2653                         (*oconv)(0, '('); */
2654                         LAST;
2655                     } else {
2656                         if (c1 == 'I') {
2657                             /* This is X0201 kana introduction */
2658                             input_mode = X0201; shift_mode = X0201;
2659                             NEXT;
2660                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2661                             /* This is X0208 kanji introduction */
2662                             input_mode = ASCII; shift_mode = FALSE;
2663                             NEXT;
2664                         } else if (broken_f&0x2) {
2665                             input_mode = ASCII; shift_mode = FALSE;
2666                             NEXT;
2667                         } else {
2668                             (*oconv)(0, ESC);
2669                             (*oconv)(0, '(');
2670                             /* maintain various input_mode here */
2671                             SEND;
2672                         }
2673                     }
2674                } else if ( c1 == 'N' || c1 == 'n' ){
2675                    /* SS2 */
2676                    c3 = (*i_getc)(f);  /* skip SS2 */
2677                    if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2678                        c1 = c3;
2679                        c2 = X0201;
2680                        SEND;
2681                    }else{
2682                        (*i_ungetc)(c3, f);
2683                        /* lonely ESC  */
2684                        (*oconv)(0, ESC);
2685                        SEND;
2686                    }
2687                 } else {
2688                     /* lonely ESC  */
2689                     (*oconv)(0, ESC);
2690                     SEND;
2691                 }
2692             } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2693                 input_mode = ASCII; set_iconv(FALSE, 0);
2694                 SEND;
2695             } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2696                 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2697                     i_ungetc(SPACE,f);
2698                     continue;
2699                 } else {
2700                     i_ungetc(c1,f);
2701                 }
2702                 c1 = NL;
2703                 SEND;
2704             } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2705                 if ((c1=(*i_getc)(f))!=EOF) {
2706                     if (c1==SPACE) {
2707                         i_ungetc(SPACE,f);
2708                         continue;
2709                     } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2710                         i_ungetc(SPACE,f);
2711                         continue;
2712                     } else {
2713                         i_ungetc(c1,f);
2714                     }
2715                     i_ungetc(NL,f);
2716                 } else {
2717                     i_ungetc(c1,f);
2718                 }
2719                 c1 = CR;
2720                 SEND;
2721             } else 
2722                 SEND;
2723         }
2724         /* send: */
2725         switch(input_mode){
2726         case ASCII:
2727             if ((*iconv)(c2, c1, 0) < 0){  /* can be EUC/SJIS */
2728                 nkf_char c0 = (*i_getc)(f);
2729                 if (c0 != EOF){
2730                     code_status(c0);
2731                     (*iconv)(c2, c1, c0);
2732                 }
2733             }
2734             break;
2735         case X0208:
2736         case X0213_1:
2737             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2738             break;
2739 #ifdef X0212_ENABLE
2740         case X0212:
2741             (*oconv)(PREFIX_EUCG3 | c2, c1);
2742             break;
2743 #endif /* X0212_ENABLE */
2744         case X0213_2:
2745             (*oconv)(PREFIX_EUCG3 | c2, c1);
2746             break;
2747         default:
2748             (*oconv)(input_mode, c1);  /* other special case */
2749         }
2750
2751         c2 = 0;
2752         continue;
2753         /* goto next_word */
2754     }
2755
2756     /* epilogue */
2757     (*iconv)(EOF, 0, 0);
2758     if (!is_inputcode_set)
2759     {
2760         if (is_8bit) {
2761             struct input_code *p = input_code_list;
2762             struct input_code *result = p;
2763             while (p->name){
2764                 if (p->score < result->score) result = p;
2765                 ++p;
2766             }
2767             set_input_codename(result->name);
2768         }
2769     }
2770     return 1;
2771 }
2772
2773 nkf_char
2774 h_conv(FILE *f, nkf_char c2, nkf_char c1)
2775 {
2776     nkf_char    wc,c3;
2777
2778
2779     /** it must NOT be in the kanji shifte sequence      */
2780     /** it must NOT be written in JIS7                   */
2781     /** and it must be after 2 byte 8bit code            */
2782
2783     hold_count = 0;
2784     push_hold_buf(c2);
2785     push_hold_buf(c1);
2786
2787     while ((c1 = (*i_getc)(f)) != EOF) {
2788         if (c1 == ESC){
2789             (*i_ungetc)(c1,f);
2790             break;
2791         }
2792         code_status(c1);
2793         if (push_hold_buf(c1) == EOF || estab_f){
2794             break;
2795         }
2796     }
2797
2798     if (!estab_f){
2799         struct input_code *p = input_code_list;
2800         struct input_code *result = p;
2801         if (c1 == EOF){
2802             code_status(c1);
2803         }
2804         while (p->name){
2805             if (p->score < result->score){
2806                 result = p;
2807             }
2808             ++p;
2809         }
2810         set_iconv(FALSE, result->iconv_func);
2811     }
2812
2813
2814     /** now,
2815      ** 1) EOF is detected, or
2816      ** 2) Code is established, or
2817      ** 3) Buffer is FULL (but last word is pushed)
2818      **
2819      ** in 1) and 3) cases, we continue to use
2820      ** Kanji codes by oconv and leave estab_f unchanged.
2821      **/
2822
2823     c3=c1;
2824     wc = 0;
2825     while (wc < hold_count){
2826         c2 = hold_buf[wc++];
2827         if (c2 <= DEL
2828 #ifdef NUMCHAR_OPTION
2829             || is_unicode_capsule(c2)
2830 #endif
2831             ){
2832             (*iconv)(0, c2, 0);
2833             continue;
2834         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2835             (*iconv)(X0201, c2, 0);
2836             continue;
2837         }
2838         if (wc < hold_count){
2839             c1 = hold_buf[wc++];
2840         }else{
2841             c1 = (*i_getc)(f);
2842             if (c1 == EOF){
2843                 c3 = EOF;
2844                 break;
2845             }
2846             code_status(c1);
2847         }
2848         if ((*iconv)(c2, c1, 0) < 0){
2849             nkf_char c0;
2850             if (wc < hold_count){
2851                 c0 = hold_buf[wc++];
2852             }else{
2853                 c0 = (*i_getc)(f);
2854                 if (c0 == EOF){
2855                     c3 = EOF;
2856                     break;
2857                 }
2858                 code_status(c0);
2859             }
2860             (*iconv)(c2, c1, c0);
2861         }
2862     }
2863     return c3;
2864 }
2865
2866
2867
2868 nkf_char
2869 push_hold_buf(nkf_char c2)
2870 {
2871     if (hold_count >= HOLD_SIZE*2)
2872         return (EOF);
2873     hold_buf[hold_count++] = (unsigned char)c2;
2874     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2875 }
2876
2877 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
2878 {
2879 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
2880     nkf_char val;
2881 #endif
2882     static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
2883 #ifdef SHIFTJIS_CP932
2884     if (cp51932_f && is_ibmext_in_sjis(c2)){
2885 #if 0
2886         extern const unsigned short shiftjis_cp932[3][189];
2887 #endif
2888         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2889         if (val){
2890             c2 = val >> 8;
2891             c1 = val & 0xff;
2892         }
2893     }
2894 #endif /* SHIFTJIS_CP932 */
2895 #ifdef X0212_ENABLE
2896     if (!x0213_f && is_ibmext_in_sjis(c2)){
2897 #if 0
2898         extern const unsigned short shiftjis_x0212[3][189];
2899 #endif
2900         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2901         if (val){
2902             if (val > 0x7FFF){
2903                 c2 = PREFIX_EUCG3 | (val >> 8);
2904                 c1 = val & 0xff;
2905             }else{
2906                 c2 = val >> 8;
2907                 c1 = val & 0xff;
2908             }
2909             if (p2) *p2 = c2;
2910             if (p1) *p1 = c1;
2911             return 0;
2912         }
2913     }
2914 #endif
2915     if(c2 >= 0x80){
2916         if(x0213_f && c2 >= 0xF0){
2917             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
2918                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
2919             }else{ /* 78<=k<=94 */
2920                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
2921                 if (0x9E < c1) c2++;
2922             }
2923         }else{
2924             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
2925             if (0x9E < c1) c2++;
2926         }
2927         if (c1 < 0x9F)
2928             c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
2929         else {
2930             c1 = c1 - 0x7E;
2931         }
2932     }
2933
2934 #ifdef X0212_ENABLE
2935     c2 = x0212_unshift(c2);
2936 #endif
2937     if (p2) *p2 = c2;
2938     if (p1) *p1 = c1;
2939     return 0;
2940 }
2941
2942 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2943 {
2944     if (c2 == X0201) {
2945         c1 &= 0x7f;
2946     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2947         /* NOP */
2948     } else {
2949         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2950         if (ret) return ret;
2951     }
2952     (*oconv)(c2, c1);
2953     return 0;
2954 }
2955
2956 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2957 {
2958     if (c2 == X0201) {
2959         c1 &= 0x7f;
2960 #ifdef X0212_ENABLE
2961     }else if (c2 == 0x8f){
2962         if (c0 == 0){
2963             return -1;
2964         }
2965         c2 = (c2 << 8) | (c1 & 0x7f);
2966         c1 = c0 & 0x7f;
2967 #ifdef SHIFTJIS_CP932
2968         if (cp51932_f){
2969             nkf_char s2, s1;
2970             if (e2s_conv(c2, c1, &s2, &s1) == 0){
2971                 s2e_conv(s2, s1, &c2, &c1);
2972                 if (c2 < 0x100){
2973                     c1 &= 0x7f;
2974                     c2 &= 0x7f;
2975                 }
2976             }
2977         }
2978 #endif /* SHIFTJIS_CP932 */
2979 #endif /* X0212_ENABLE */
2980     } else if (c2 == SSO){
2981         c2 = X0201;
2982         c1 &= 0x7f;
2983     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2984         /* NOP */
2985     } else {
2986         c1 &= 0x7f;
2987         c2 &= 0x7f;
2988     }
2989     (*oconv)(c2, c1);
2990     return 0;
2991 }
2992
2993 #ifdef UTF8_INPUT_ENABLE
2994 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
2995 {
2996     nkf_char ret = 0;
2997
2998     if (!c1){
2999         *p2 = 0;
3000         *p1 = c2;
3001     }else if (0xc0 <= c2 && c2 <= 0xef) {
3002         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3003 #ifdef NUMCHAR_OPTION
3004         if (ret > 0){
3005             if (p2) *p2 = 0;
3006             if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
3007             ret = 0;
3008         }
3009 #endif
3010     }
3011     return ret;
3012 }
3013
3014 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3015 {
3016     nkf_char ret = 0;
3017     
3018     /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
3019     if(ignore_zwnbsp_f){
3020         ignore_zwnbsp_f = FALSE;
3021         if(c2 == 0xef && c1 == 0xbb && c0 == 0xbf)
3022             return 0;
3023     }
3024     
3025     if (c2 == 0) /* 0x00-0x7f */
3026         c1 &= 0x7F; /* 1byte */
3027     else if (c0 == 0){
3028         if ((c2 & 0xe0) == 0xc0){ /* 0xc0-0xdf */
3029             /* 2ytes */
3030             if((c2 & 0xFE) == 0xC0 || c1 < 0x80 || 0xBF < c1) return 0;
3031         }else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */
3032             return -1; /* 3bytes */
3033 #ifdef __COMMENT__
3034         else if (0xf0 <= c2)
3035             return 0; /* 4,5,6bytes */
3036         else if ((c2 & 0xc0) == 0x80) /* 0x80-0xbf */
3037             return 0; /* trail byte */
3038 #endif
3039         else return 0;
3040     }else{
3041         /* must be 3bytes */
3042         if(c2 == 0xE0){
3043             if(c1 < 0xA0 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
3044                 return 0;
3045         }else if(c2 == 0xED){
3046             if(c1 < 0x80 || 0x9F < c1 || c0 < 0x80 || 0xBF < c0)
3047                 return 0;
3048         }else if((c2 & 0xf0) == 0xe0){
3049             if(c1 < 0x80 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
3050                 return 0;
3051         }else return 0;
3052     }
3053     if (c2 == 0 || c2 == EOF){
3054     } else {
3055         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3056     }
3057     if (ret == 0){
3058         (*oconv)(c2, c1);
3059     }
3060     return ret;
3061 }
3062 #endif
3063
3064 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3065 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3066 {
3067     val &= VALUE_MASK;
3068     if (val < 0x80){
3069         *p2 = val;
3070         *p1 = 0;
3071         *p0 = 0;
3072     }else if (val < 0x800){
3073         *p2 = 0xc0 | (val >> 6);
3074         *p1 = 0x80 | (val & 0x3f);
3075         *p0 = 0;
3076     } else if (val <= NKF_INT32_C(0xFFFF)) {
3077         *p2 = 0xe0 | (val >> 12);
3078         *p1 = 0x80 | ((val >> 6) & 0x3f);
3079         *p0 = 0x80 | (val        & 0x3f);
3080     } else {
3081         *p2 = 0;
3082         *p1 = 0;
3083         *p0 = 0;
3084     }
3085 }
3086 #endif
3087
3088 #ifdef UTF8_INPUT_ENABLE
3089 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3090 {
3091     nkf_char val;
3092     if (c2 >= 0xf0){
3093         val = -1;
3094     }else if (c2 >= 0xe0){
3095         val = (c2 & 0x0f) << 12;
3096         val |= (c1 & 0x3f) << 6;
3097         val |= (c0 & 0x3f);
3098     }else if (c2 >= 0xc0){
3099         val = (c2 & 0x1f) << 6;
3100         val |= (c1 & 0x3f);
3101     }else{
3102         val = c2;
3103     }
3104     return val;
3105 }
3106
3107 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3108 {
3109     nkf_char c2, c1, c0;
3110     nkf_char ret = 0;
3111     val &= VALUE_MASK;
3112     if (val < 0x80){
3113         *p2 = 0;
3114         *p1 = val;
3115     }else{
3116         w16w_conv(val, &c2, &c1, &c0);
3117         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3118 #ifdef NUMCHAR_OPTION
3119         if (ret > 0){
3120             *p2 = 0;
3121             *p1 = CLASS_UTF16 | val;
3122             ret = 0;
3123         }
3124 #endif
3125     }
3126     return ret;
3127 }
3128 #endif
3129
3130 #ifdef UTF8_INPUT_ENABLE
3131 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3132 {
3133     nkf_char ret;
3134
3135     /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
3136     if(ignore_zwnbsp_f){
3137         ignore_zwnbsp_f = FALSE;
3138         if (c2==0376 && c1==0377){
3139             utf16_mode = UTF16BE_INPUT;
3140             return 0;
3141         }else if(c2==0377 && c1==0376){
3142             utf16_mode = UTF16LE_INPUT;
3143             return 0;
3144         }
3145     }
3146     if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
3147         nkf_char tmp;
3148         tmp=c1; c1=c2; c2=tmp;
3149     }
3150     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3151         (*oconv)(c2, c1);
3152         return 0;
3153     }else if((c2>>3)==27){ /* surrogate pair */
3154         return 1;
3155     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3156     if (ret) return ret;
3157     (*oconv)(c2, c1);
3158     return 0;
3159 }
3160
3161 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3162 {
3163 #if 0
3164     extern const unsigned short *const utf8_to_euc_2bytes[];
3165     extern const unsigned short *const utf8_to_euc_2bytes_ms[];
3166     extern const unsigned short *const utf8_to_euc_2bytes_932[];
3167     extern const unsigned short *const *const utf8_to_euc_3bytes[];
3168     extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
3169     extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
3170 #endif
3171     const unsigned short *const *pp;
3172     const unsigned short *const *const *ppp;
3173     static const int no_best_fit_chars_table_C2[] =
3174     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3175         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3176         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3177         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3178     static const int no_best_fit_chars_table_C2_ms[] =
3179     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3180         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3181         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3182         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3183     static const int no_best_fit_chars_table_932_C2[] =
3184     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3185         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3186         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3187         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3188     static const int no_best_fit_chars_table_932_C3[] =
3189     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3190         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3191         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3192         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3193     nkf_char ret = 0;
3194
3195     if(c2 < 0x80){
3196         *p2 = 0;
3197         *p1 = c2;
3198     }else if(c2 < 0xe0){
3199         if(no_best_fit_chars_f){
3200             if(ms_ucs_map_f == UCS_MAP_CP932){
3201                 switch(c2){
3202                 case 0xC2:
3203                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3204                     break;
3205                 case 0xC3:
3206                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3207                     break;
3208                 }
3209             }else if(cp51932_f){
3210                 switch(c2){
3211                 case 0xC2:
3212                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3213                     break;
3214                 case 0xC3:
3215                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3216                     break;
3217                 }
3218             }else if(ms_ucs_map_f == UCS_MAP_MS){
3219                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3220             }
3221         }
3222         pp =
3223             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3224             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3225             utf8_to_euc_2bytes;
3226         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3227     }else if(c0 < 0xF0){
3228         if(no_best_fit_chars_f){
3229             if(ms_ucs_map_f == UCS_MAP_CP932){
3230                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3231             }else if(ms_ucs_map_f == UCS_MAP_MS){
3232                 switch(c2){
3233                 case 0xE2:
3234                     switch(c1){
3235                     case 0x80:
3236                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3237                         break;
3238                     case 0x88:
3239                         if(c0 == 0x92) return 1;
3240                         break;
3241                     }
3242                     break;
3243                 case 0xE3:
3244                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3245                     break;
3246                 }
3247             }else{
3248                 switch(c2){
3249                 case 0xE2:
3250                     switch(c1){
3251                     case 0x80:
3252                         if(c0 == 0x95) return 1;
3253                         break;
3254                     case 0x88:
3255                         if(c0 == 0xA5) return 1;
3256                         break;
3257                     }
3258                     break;
3259                 case 0xEF:
3260                     switch(c1){
3261                     case 0xBC:
3262                         if(c0 == 0x8D) return 1;
3263                         break;
3264                     case 0xBD:
3265                         if(c0 == 0x9E && cp51932_f) return 1;
3266                         break;
3267                     case 0xBF:
3268                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3269                         break;
3270                     }
3271                     break;
3272                 }
3273             }
3274         }
3275         ppp =
3276             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3277             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3278             utf8_to_euc_3bytes;
3279         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3280     }else return -1;
3281 #ifdef SHIFTJIS_CP932
3282     if (!ret && cp51932_f && is_eucg3(*p2)) {
3283         nkf_char s2, s1;
3284         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3285             s2e_conv(s2, s1, p2, p1);
3286         }else{
3287             ret = 1;
3288         }
3289     }
3290 #endif
3291     return ret;
3292 }
3293
3294 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3295 {
3296     nkf_char c2;
3297     const unsigned short *p;
3298     unsigned short val;
3299
3300     if (pp == 0) return 1;
3301
3302     c1 -= 0x80;
3303     if (c1 < 0 || psize <= c1) return 1;
3304     p = pp[c1];
3305     if (p == 0)  return 1;
3306
3307     c0 -= 0x80;
3308     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3309     val = p[c0];
3310     if (val == 0) return 1;
3311     if (no_cp932ext_f && (
3312         (val>>8) == 0x2D || /* NEC special characters */
3313         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3314         )) return 1;
3315
3316     c2 = val >> 8;
3317    if (val > 0x7FFF){
3318         c2 &= 0x7f;
3319         c2 |= PREFIX_EUCG3;
3320     }
3321     if (c2 == SO) c2 = X0201;
3322     c1 = val & 0x7f;
3323     if (p2) *p2 = c2;
3324     if (p1) *p1 = c1;
3325     return 0;
3326 }
3327
3328 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3329 {
3330     const char *hex = "0123456789ABCDEF";
3331     int shift = 20;
3332     c &= VALUE_MASK;
3333     while(shift >= 0){
3334         if(c >= 1<<shift){
3335             while(shift >= 0){
3336                 (*f)(0, hex[(c>>shift)&0xF]);
3337                 shift -= 4;
3338             }
3339         }else{
3340             shift -= 4;
3341         }
3342     }
3343     return;
3344 }
3345
3346 void encode_fallback_html(nkf_char c)
3347 {
3348     (*oconv)(0, '&');
3349     (*oconv)(0, '#');
3350     c &= VALUE_MASK;
3351     if(c >= NKF_INT32_C(1000000))
3352         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3353     if(c >= NKF_INT32_C(100000))
3354         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3355     if(c >= 10000)
3356         (*oconv)(0, 0x30+(c/10000  )%10);
3357     if(c >= 1000)
3358         (*oconv)(0, 0x30+(c/1000   )%10);
3359     if(c >= 100)
3360         (*oconv)(0, 0x30+(c/100    )%10);
3361     if(c >= 10)
3362         (*oconv)(0, 0x30+(c/10     )%10);
3363     if(c >= 0)
3364         (*oconv)(0, 0x30+ c         %10);
3365     (*oconv)(0, ';');
3366     return;
3367 }
3368
3369 void encode_fallback_xml(nkf_char c)
3370 {
3371     (*oconv)(0, '&');
3372     (*oconv)(0, '#');
3373     (*oconv)(0, 'x');
3374     nkf_each_char_to_hex(oconv, c);
3375     (*oconv)(0, ';');
3376     return;
3377 }
3378
3379 void encode_fallback_java(nkf_char c)
3380 {
3381     const char *hex = "0123456789ABCDEF";
3382     (*oconv)(0, '\\');
3383     c &= VALUE_MASK;
3384     if(!is_unicode_bmp(c)){
3385         (*oconv)(0, 'U');
3386         (*oconv)(0, '0');
3387         (*oconv)(0, '0');
3388         (*oconv)(0, hex[(c>>20)&0xF]);
3389         (*oconv)(0, hex[(c>>16)&0xF]);
3390     }else{
3391         (*oconv)(0, 'u');
3392     }
3393     (*oconv)(0, hex[(c>>12)&0xF]);
3394     (*oconv)(0, hex[(c>> 8)&0xF]);
3395     (*oconv)(0, hex[(c>> 4)&0xF]);
3396     (*oconv)(0, hex[ c     &0xF]);
3397     return;
3398 }
3399
3400 void encode_fallback_perl(nkf_char c)
3401 {
3402     (*oconv)(0, '\\');
3403     (*oconv)(0, 'x');
3404     (*oconv)(0, '{');
3405     nkf_each_char_to_hex(oconv, c);
3406     (*oconv)(0, '}');
3407     return;
3408 }
3409
3410 void encode_fallback_subchar(nkf_char c)
3411 {
3412     c = unicode_subchar;
3413     (*oconv)((c>>8)&0xFF, c&0xFF);
3414     return;
3415 }
3416 #endif
3417
3418 #ifdef UTF8_OUTPUT_ENABLE
3419 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3420 {
3421 #if 0
3422     extern const unsigned short euc_to_utf8_1byte[];
3423     extern const unsigned short *const euc_to_utf8_2bytes[];
3424     extern const unsigned short *const euc_to_utf8_2bytes_ms[];
3425     extern const unsigned short *const x0212_to_utf8_2bytes[];
3426 #endif
3427     const unsigned short *p;
3428
3429     if (c2 == X0201) {
3430         p = euc_to_utf8_1byte;
3431 #ifdef X0212_ENABLE
3432     } else if (is_eucg3(c2)){
3433         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3434             return 0xA6;
3435         }
3436         c2 = (c2&0x7f) - 0x21;
3437         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3438             p = x0212_to_utf8_2bytes[c2];
3439         else
3440             return 0;
3441 #endif
3442     } else {
3443         c2 &= 0x7f;
3444         c2 = (c2&0x7f) - 0x21;
3445         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3446             p = ms_ucs_map_f != UCS_MAP_ASCII ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
3447         else
3448             return 0;
3449     }
3450     if (!p) return 0;
3451     c1 = (c1 & 0x7f) - 0x21;
3452     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3453         return p[c1];
3454     return 0;
3455 }
3456
3457 void w_oconv(nkf_char c2, nkf_char c1)
3458 {
3459     nkf_char c0;
3460     nkf_char val;
3461     if (c2 == EOF) {
3462         (*o_putc)(EOF);
3463         return;
3464     }
3465
3466     if (unicode_bom_f==2) {
3467         (*o_putc)('\357');
3468         (*o_putc)('\273');
3469         (*o_putc)('\277');
3470         unicode_bom_f=1;
3471     }
3472
3473 #ifdef NUMCHAR_OPTION
3474     if (c2 == 0 && is_unicode_capsule(c1)){
3475         val = c1 & VALUE_MASK;
3476         if (val < 0x80){
3477             (*o_putc)(val);
3478         }else if (val < 0x800){
3479             (*o_putc)(0xC0 | (val >> 6));
3480             (*o_putc)(0x80 | (val & 0x3f));
3481         } else if (val <= NKF_INT32_C(0xFFFF)) {
3482             (*o_putc)(0xE0 | (val >> 12));
3483             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3484             (*o_putc)(0x80 | (val        & 0x3f));
3485         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3486             (*o_putc)(0xE0 | ( val>>18));
3487             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3488             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3489             (*o_putc)(0x80 | ( val      & 0x3f));
3490         }
3491         return;
3492     }
3493 #endif
3494
3495     if (c2 == 0) { 
3496         output_mode = ASCII;
3497         (*o_putc)(c1);
3498     } else if (c2 == ISO8859_1) {
3499         output_mode = ISO8859_1;
3500         (*o_putc)(c1 | 0x080);
3501     } else {
3502         output_mode = UTF8;
3503         val = e2w_conv(c2, c1);
3504         if (val){
3505             w16w_conv(val, &c2, &c1, &c0);
3506             (*o_putc)(c2);
3507             if (c1){
3508                 (*o_putc)(c1);
3509                 if (c0) (*o_putc)(c0);
3510             }
3511         }
3512     }
3513 }
3514
3515 void w_oconv16(nkf_char c2, nkf_char c1)
3516 {
3517     if (c2 == EOF) {
3518         (*o_putc)(EOF);
3519         return;
3520     }    
3521
3522     if (unicode_bom_f==2) {
3523         if (w_oconv16_LE){
3524             (*o_putc)((unsigned char)'\377');
3525             (*o_putc)('\376');
3526         }else{
3527             (*o_putc)('\376');
3528             (*o_putc)((unsigned char)'\377');
3529         }
3530         unicode_bom_f=1;
3531     }
3532
3533     if (c2 == ISO8859_1) {
3534         c2 = 0;
3535         c1 |= 0x80;
3536 #ifdef NUMCHAR_OPTION
3537     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3538         if (is_unicode_bmp(c1)) {
3539             c2 = (c1 >> 8) & 0xff;
3540             c1 &= 0xff;
3541         } else {
3542             c1 &= VALUE_MASK;
3543             if (c1 <= UNICODE_MAX) {
3544                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3545                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3546                 if (w_oconv16_LE){
3547                     (*o_putc)(c2 & 0xff);
3548                     (*o_putc)((c2 >> 8) & 0xff);
3549                     (*o_putc)(c1 & 0xff);
3550                     (*o_putc)((c1 >> 8) & 0xff);
3551                 }else{
3552                     (*o_putc)((c2 >> 8) & 0xff);
3553                     (*o_putc)(c2 & 0xff);
3554                     (*o_putc)((c1 >> 8) & 0xff);
3555                     (*o_putc)(c1 & 0xff);
3556                 }
3557             }
3558             return;
3559         }
3560 #endif
3561     } else if (c2) {
3562         nkf_char val = e2w_conv(c2, c1);
3563         c2 = (val >> 8) & 0xff;
3564         c1 = val & 0xff;
3565     }
3566     if (w_oconv16_LE){
3567         (*o_putc)(c1);
3568         (*o_putc)(c2);
3569     }else{
3570         (*o_putc)(c2);
3571         (*o_putc)(c1);
3572     }
3573 }
3574
3575 #endif
3576
3577 void e_oconv(nkf_char c2, nkf_char c1)
3578 {
3579 #ifdef NUMCHAR_OPTION
3580     if (c2 == 0 && is_unicode_capsule(c1)){
3581         w16e_conv(c1, &c2, &c1);
3582         if (c2 == 0 && is_unicode_capsule(c1)){
3583             if(encode_fallback)(*encode_fallback)(c1);
3584             return;
3585         }
3586     }
3587 #endif
3588     if (c2 == EOF) {
3589         (*o_putc)(EOF);
3590         return;
3591     } else if (c2 == 0) { 
3592         output_mode = ASCII;
3593         (*o_putc)(c1);
3594     } else if (c2 == X0201) {
3595         output_mode = JAPANESE_EUC;
3596         (*o_putc)(SSO); (*o_putc)(c1|0x80);
3597     } else if (c2 == ISO8859_1) {
3598         output_mode = ISO8859_1;
3599         (*o_putc)(c1 | 0x080);
3600 #ifdef X0212_ENABLE
3601     } else if (is_eucg3(c2)){
3602         output_mode = JAPANESE_EUC;
3603 #ifdef SHIFTJIS_CP932
3604         if (cp51932_f){
3605             nkf_char s2, s1;
3606             if (e2s_conv(c2, c1, &s2, &s1) == 0){
3607                 s2e_conv(s2, s1, &c2, &c1);
3608             }
3609         }
3610 #endif
3611         if (c2 == 0) {
3612             output_mode = ASCII;
3613             (*o_putc)(c1);
3614         }else if (is_eucg3(c2)){
3615             if (x0212_f){
3616                 (*o_putc)(0x8f);
3617                 (*o_putc)((c2 & 0x7f) | 0x080);
3618                 (*o_putc)(c1 | 0x080);
3619             }
3620         }else{
3621             (*o_putc)((c2 & 0x7f) | 0x080);
3622             (*o_putc)(c1 | 0x080);
3623         }
3624 #endif
3625     } else {
3626         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
3627             set_iconv(FALSE, 0);
3628             return; /* too late to rescue this char */
3629         }
3630         output_mode = JAPANESE_EUC;
3631         (*o_putc)(c2 | 0x080);
3632         (*o_putc)(c1 | 0x080);
3633     }
3634 }
3635
3636 #ifdef X0212_ENABLE
3637 nkf_char x0212_shift(nkf_char c)
3638 {
3639     nkf_char ret = c;
3640     c &= 0x7f;
3641     if (is_eucg3(ret)){
3642         if (0x75 <= c && c <= 0x7f){
3643             ret = c + (0x109 - 0x75);
3644