OSDN Git Service

* use macros some repeated routines.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B 
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program 
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.  
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30 ** UTF-8 \e$B%5%]!<%H$K$D$$$F\e(B
31 **    \e$B=>Mh$N\e(B nkf \e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9\e(B
32 **    nkf -e \e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G\e(B UTF-8 \e$B$HH=Dj$5$l$l$P!"\e(B
33 **    \e$B$=$N$^$^\e(B euc-jp \e$B$KJQ49$5$l$^$9\e(B
34 **
35 **    \e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#\e(B
36 **    (\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O\e(B)
37 **
38 **    \e$B2?$+LdBj$r8+$D$1$?$i!"\e(B
39 **        E-Mail: furukawa@tcp-ip.or.jp
40 **    \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.102 2006/06/12 16:34:42 naruse Exp $ */
43 #define NKF_VERSION "2.0.7"
44 #define NKF_RELEASE_DATE "2006-06-13"
45 #include "config.h"
46 #include "utf8tbl.h"
47
48 #define COPY_RIGHT \
49     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
50     "Copyright (C) 2002-2006 Kono, Furukawa, Naruse, mastodon"
51
52
53 /*
54 **
55 **
56 **
57 ** USAGE:       nkf [flags] [file] 
58 **
59 ** Flags:
60 ** b    Output is buffered             (DEFAULT)
61 ** u    Output is unbuffered
62 **
63 ** t    no operation
64 **
65 ** j    Outout code is JIS 7 bit        (DEFAULT SELECT) 
66 ** s    Output code is MS Kanji         (DEFAULT SELECT) 
67 ** e    Output code is AT&T JIS         (DEFAULT SELECT) 
68 ** w    Output code is AT&T JIS         (DEFAULT SELECT) 
69 ** l    Output code is JIS 7bit and ISO8859-1 Latin-1
70 **
71 ** m    MIME conversion for ISO-2022-JP
72 ** I    Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
73 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
74 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
75 ** M    MIME output conversion 
76 **
77 ** r  {de/en}crypt ROT13/47
78 **
79 ** v  display Version
80 **
81 ** T  Text mode output        (for MS-DOS)
82 **
83 ** x    Do not convert X0201 kana into X0208
84 ** Z    Convert X0208 alphabet to ASCII
85 **
86 ** f60  fold option
87 **
88 ** m    MIME decode
89 ** B    try to fix broken JIS, missing Escape
90 ** B[1-9]  broken level
91 **
92 ** O   Output to 'nkf.out' file or last file name
93 ** d   Delete \r in line feed 
94 ** c   Add \r in line feed 
95 ** -- other long option
96 ** -- ignore following option (don't use with -O )
97 **
98 **/
99
100 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
101 #define MSDOS
102 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
103 #define __WIN32__
104 #endif
105 #endif
106
107 #ifdef PERL_XS
108 #undef OVERWRITE
109 #endif
110
111 #ifndef PERL_XS
112 #include <stdio.h>
113 #endif
114
115 #include <stdlib.h>
116 #include <string.h>
117
118 #if defined(MSDOS) || defined(__OS2__)
119 #include <fcntl.h>
120 #include <io.h>
121 #if defined(_MSC_VER) || defined(__WATCOMC__)
122 #define mktemp _mktemp
123 #endif
124 #endif
125
126 #ifdef MSDOS
127 #ifdef LSI_C
128 #define setbinmode(fp) fsetbin(fp)
129 #elif defined(__DJGPP__)
130 #include <libc/dosio.h>
131 #define setbinmode(fp) djgpp_setbinmode(fp)
132 #else /* Microsoft C, Turbo C */
133 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
134 #endif
135 #else /* UNIX */
136 #define setbinmode(fp)
137 #endif
138
139 #if defined(__DJGPP__)
140 void  djgpp_setbinmode(FILE *fp)
141 {
142     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
143     int fd, m;
144     fd = fileno(fp);
145     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
146     __file_handle_set(fd, m);
147 }
148 #endif
149
150 #ifdef _IOFBF /* SysV and MSDOS, Windows */
151 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
152 #else /* BSD */
153 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
154 #endif
155
156 /*Borland C++ 4.5 EasyWin*/
157 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
158 #define         EASYWIN
159 #ifndef __WIN16__
160 #define __WIN16__
161 #endif
162 #include <windows.h>
163 #endif
164
165 #ifdef OVERWRITE
166 /* added by satoru@isoternet.org */
167 #if defined(__EMX__)
168 #include <sys/types.h>
169 #endif
170 #include <sys/stat.h>
171 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
172 #include <unistd.h>
173 #if defined(__WATCOMC__)
174 #include <sys/utime.h>
175 #else
176 #include <utime.h>
177 #endif
178 #else /* defined(MSDOS) */
179 #ifdef __WIN32__
180 #ifdef __BORLANDC__ /* BCC32 */
181 #include <utime.h>
182 #else /* !defined(__BORLANDC__) */
183 #include <sys/utime.h>
184 #endif /* (__BORLANDC__) */
185 #else /* !defined(__WIN32__) */
186 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
187 #include <sys/utime.h>
188 #elif defined(__TURBOC__) /* BCC */
189 #include <utime.h>
190 #elif defined(LSI_C) /* LSI C */
191 #endif /* (__WIN32__) */
192 #endif
193 #endif
194 #endif
195
196 #define         FALSE   0
197 #define         TRUE    1
198
199 /* state of output_mode and input_mode  
200
201    c2           0 means ASCII
202                 X0201
203                 ISO8859_1
204                 X0208
205                 EOF      all termination
206    c1           32bit data
207
208  */
209
210 #define         ASCII           0
211 #define         X0208           1
212 #define         X0201           2
213 #define         ISO8859_1       8
214 #define         NO_X0201        3
215 #define         X0212      0x2844
216 #define         X0213_1    0x284F
217 #define         X0213_2    0x2850
218
219 /* Input Assumption */
220
221 #define         JIS_INPUT       4
222 #define         SJIS_INPUT      5
223 #define         LATIN1_INPUT    6
224 #define         FIXED_MIME      7
225 #define         STRICT_MIME     8
226
227 /* MIME ENCODE */
228
229 #define         ISO2022JP       9
230 #define         JAPANESE_EUC   10
231 #define         SHIFT_JIS      11
232
233 #define         UTF8           12
234 #define         UTF8_INPUT     13
235 #define         UTF16BE_INPUT  14
236 #define         UTF16LE_INPUT  15
237
238 #define         WISH_TRUE      15
239
240 /* ASCII CODE */
241
242 #define         BS      0x08
243 #define         TAB     0x09
244 #define         NL      0x0a
245 #define         CR      0x0d
246 #define         ESC     0x1b
247 #define         SPACE   0x20
248 #define         AT      0x40
249 #define         SSP     0xa0
250 #define         DEL     0x7f
251 #define         SI      0x0f
252 #define         SO      0x0e
253 #define         SSO     0x8e
254 #define         SS3     0x8f
255
256 #define         is_alnum(c)  \
257             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
258
259 /* I don't trust portablity of toupper */
260 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
261 #define nkf_isoctal(c)  ('0'<=c && c<='7')
262 #define nkf_isdigit(c)  ('0'<=c && c<='9')
263 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
264 #define nkf_isblank(c) (c == SPACE || c == TAB)
265 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
266 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
267 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
268 #define nkf_isprint(c) (' '<=c && c<='~')
269 #define nkf_isgraph(c) ('!'<=c && c<='~')
270 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
271                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
272                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0 )
273 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
274
275 #define CP932_TABLE_BEGIN 0xFA
276 #define CP932_TABLE_END   0xFC
277 #define CP932INV_TABLE_BEGIN 0xED
278 #define CP932INV_TABLE_END   0xEE
279 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
280
281 #define         HOLD_SIZE       1024
282 #if defined(INT_IS_SHORT)
283 #define         IOBUF_SIZE      2048
284 #else
285 #define         IOBUF_SIZE      16384
286 #endif
287
288 #define         DEFAULT_J       'B'
289 #define         DEFAULT_R       'B'
290
291 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
292 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
293
294 #define         RANGE_NUM_MAX   18
295 #define         GETA1   0x22
296 #define         GETA2   0x2e
297
298
299 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
300 #define sizeof_euc_to_utf8_1byte 94
301 #define sizeof_euc_to_utf8_2bytes 94
302 #define sizeof_utf8_to_euc_C2 64
303 #define sizeof_utf8_to_euc_E5B8 64
304 #define sizeof_utf8_to_euc_2bytes 112
305 #define sizeof_utf8_to_euc_3bytes 16
306 #endif
307
308 /* MIME preprocessor */
309
310 #ifdef EASYWIN /*Easy Win */
311 extern POINT _BufferSize;
312 #endif
313
314 struct input_code{
315     char *name;
316     nkf_char stat;
317     nkf_char score;
318     nkf_char index;
319     nkf_char buf[3];
320     void (*status_func)(struct input_code *, nkf_char);
321     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
322     int _file_stat;
323 };
324
325 static char *input_codename = "";
326
327 #ifndef PERL_XS
328 static const char *CopyRight = COPY_RIGHT;
329 #endif
330 #if !defined(PERL_XS) && !defined(WIN32DLL)
331 static  nkf_char     noconvert(FILE *f);
332 #endif
333 static  void    module_connection(void);
334 static  nkf_char     kanji_convert(FILE *f);
335 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
336 static  nkf_char     push_hold_buf(nkf_char c2);
337 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
338 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
339 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
340 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
341 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
342 /* UCS Mapping
343  * 0: Shift_JIS, eucJP-ascii
344  * 1: eucJP-ms
345  * 2: CP932, CP51932
346  */
347 #define UCS_MAP_ASCII 0
348 #define UCS_MAP_MS    1
349 #define UCS_MAP_CP932 2
350 static int ms_ucs_map_f = UCS_MAP_ASCII;
351 #endif
352 #ifdef UTF8_INPUT_ENABLE
353 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
354 static  int     no_cp932ext_f = FALSE;
355 /* ignore ZERO WIDTH NO-BREAK SPACE */
356 static  int     ignore_zwnbsp_f = TRUE;
357 static  int     no_best_fit_chars_f = FALSE;
358 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
359 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
360 static  void    encode_fallback_html(nkf_char c);
361 static  void    encode_fallback_xml(nkf_char c);
362 static  void    encode_fallback_java(nkf_char c);
363 static  void    encode_fallback_perl(nkf_char c);
364 static  void    encode_fallback_subchar(nkf_char c);
365 static  void    (*encode_fallback)(nkf_char c) = NULL;
366 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
367 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
368 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
369 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
370 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
371 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
372 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
373 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
374 #endif
375 #ifdef UTF8_OUTPUT_ENABLE
376 static  int     unicode_bom_f= 0;   /* Output Unicode BOM */
377 static  int     w_oconv16_LE = 0;   /* utf-16 little endian */
378 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
379 static  void    w_oconv(nkf_char c2,nkf_char c1);
380 static  void    w_oconv16(nkf_char c2,nkf_char c1);
381 #endif
382 static  void    e_oconv(nkf_char c2,nkf_char c1);
383 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
384 static  void    s_oconv(nkf_char c2,nkf_char c1);
385 static  void    j_oconv(nkf_char c2,nkf_char c1);
386 static  void    fold_conv(nkf_char c2,nkf_char c1);
387 static  void    cr_conv(nkf_char c2,nkf_char c1);
388 static  void    z_conv(nkf_char c2,nkf_char c1);
389 static  void    rot_conv(nkf_char c2,nkf_char c1);
390 static  void    hira_conv(nkf_char c2,nkf_char c1);
391 static  void    base64_conv(nkf_char c2,nkf_char c1);
392 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
393 static  void    no_connection(nkf_char c2,nkf_char c1);
394 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
395
396 static  void    code_score(struct input_code *ptr);
397 static  void    code_status(nkf_char c);
398
399 static  void    std_putc(nkf_char c);
400 static  nkf_char     std_getc(FILE *f);
401 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
402
403 static  nkf_char     broken_getc(FILE *f);
404 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
405
406 static  nkf_char     mime_begin(FILE *f);
407 static  nkf_char     mime_getc(FILE *f);
408 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
409
410 static  void    switch_mime_getc(void);
411 static  void    unswitch_mime_getc(void);
412 static  nkf_char     mime_begin_strict(FILE *f);
413 static  nkf_char     mime_getc_buf(FILE *f);
414 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
415 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
416
417 static  nkf_char     base64decode(nkf_char c);
418 static  void    mime_prechar(nkf_char c2, nkf_char c1);
419 static  void    mime_putc(nkf_char c);
420 static  void    open_mime(nkf_char c);
421 static  void    close_mime(void);
422 static  void    eof_mime(void);
423 static  void    mimeout_addchar(nkf_char c);
424 #ifndef PERL_XS
425 static  void    usage(void);
426 static  void    version(void);
427 #endif
428 static  void    options(unsigned char *c);
429 #if defined(PERL_XS) || defined(WIN32DLL)
430 static  void    reinit(void);
431 #endif
432
433 /* buffers */
434
435 #if !defined(PERL_XS) && !defined(WIN32DLL)
436 static unsigned char   stdibuf[IOBUF_SIZE];
437 static unsigned char   stdobuf[IOBUF_SIZE];
438 #endif
439 static unsigned char   hold_buf[HOLD_SIZE*2];
440 static int             hold_count;
441
442 /* MIME preprocessor fifo */
443
444 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
445 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)   
446 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
447 static unsigned char           mime_buf[MIME_BUF_SIZE];
448 static unsigned int            mime_top = 0;
449 static unsigned int            mime_last = 0;  /* decoded */
450 static unsigned int            mime_input = 0; /* undecoded */
451 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
452
453 /* flags */
454 static int             unbuf_f = FALSE;
455 static int             estab_f = FALSE;
456 static int             nop_f = FALSE;
457 static int             binmode_f = TRUE;       /* binary mode */
458 static int             rot_f = FALSE;          /* rot14/43 mode */
459 static int             hira_f = FALSE;          /* hira/kata henkan */
460 static int             input_f = FALSE;        /* non fixed input code  */
461 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
462 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
463 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
464 static int             mimebuf_f = FALSE;      /* MIME buffered input */
465 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
466 static int             iso8859_f = FALSE;      /* ISO8859 through */
467 static int             mimeout_f = FALSE;       /* base64 mode */
468 #if defined(MSDOS) || defined(__OS2__) 
469 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
470 #else
471 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
472 #endif
473 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
474
475 #ifdef UNICODE_NORMALIZATION
476 static int nfc_f = FALSE;
477 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
478 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
479 static nkf_char nfc_getc(FILE *f);
480 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
481 #endif
482
483 #ifdef INPUT_OPTION
484 static int cap_f = FALSE;
485 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
486 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
487 static nkf_char cap_getc(FILE *f);
488 static nkf_char cap_ungetc(nkf_char c,FILE *f);
489
490 static int url_f = FALSE;
491 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
492 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
493 static nkf_char url_getc(FILE *f);
494 static nkf_char url_ungetc(nkf_char c,FILE *f);
495 #endif
496
497 #if defined(INT_IS_SHORT)
498 #define NKF_INT32_C(n)   (n##L)
499 #else
500 #define NKF_INT32_C(n)   (n)
501 #endif
502 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
503 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
504 #define CLASS_UTF16     NKF_INT32_C(0x01000000)
505 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
506 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
507 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UTF16)
508 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
509
510 #ifdef NUMCHAR_OPTION
511 static int numchar_f = FALSE;
512 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
513 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
514 static nkf_char numchar_getc(FILE *f);
515 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
516 #endif
517
518 #ifdef CHECK_OPTION
519 static int noout_f = FALSE;
520 static void no_putc(nkf_char c);
521 static nkf_char debug_f = FALSE;
522 static void debug(const char *str);
523 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
524 #endif
525
526 static int guess_f = FALSE;
527 #if !defined PERL_XS
528 static  void    print_guessed_code(char *filename);
529 #endif
530 static  void    set_input_codename(char *codename);
531 static int is_inputcode_mixed = FALSE;
532 static int is_inputcode_set   = FALSE;
533
534 #ifdef EXEC_IO
535 static int exec_f = 0;
536 #endif
537
538 #ifdef SHIFTJIS_CP932
539 /* invert IBM extended characters to others */
540 static int cp51932_f = TRUE;
541
542 /* invert NEC-selected IBM extended characters to IBM extended characters */
543 static int cp932inv_f = TRUE;
544
545 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
546 #endif /* SHIFTJIS_CP932 */
547
548 #ifdef X0212_ENABLE
549 static int x0212_f = FALSE;
550 static nkf_char x0212_shift(nkf_char c);
551 static nkf_char x0212_unshift(nkf_char c);
552 #endif
553 static int x0213_f = FALSE;
554
555 static unsigned char prefix_table[256];
556
557 static void set_code_score(struct input_code *ptr, nkf_char score);
558 static void clr_code_score(struct input_code *ptr, nkf_char score);
559 static void status_disable(struct input_code *ptr);
560 static void status_push_ch(struct input_code *ptr, nkf_char c);
561 static void status_clear(struct input_code *ptr);
562 static void status_reset(struct input_code *ptr);
563 static void status_reinit(struct input_code *ptr);
564 static void status_check(struct input_code *ptr, nkf_char c);
565 static void e_status(struct input_code *, nkf_char);
566 static void s_status(struct input_code *, nkf_char);
567
568 #ifdef UTF8_INPUT_ENABLE
569 static void w_status(struct input_code *, nkf_char);
570 static void w16_status(struct input_code *, nkf_char);
571 static int             utf16_mode = UTF16BE_INPUT;
572 #endif
573
574 struct input_code input_code_list[] = {
575     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
576     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
577 #ifdef UTF8_INPUT_ENABLE
578     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
579     {"UTF-16",    0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
580 #endif
581     {0}
582 };
583
584 static int              mimeout_mode = 0;
585 static int              base64_count = 0;
586
587 /* X0208 -> ASCII converter */
588
589 /* fold parameter */
590 static int             f_line = 0;    /* chars in line */
591 static int             f_prev = 0;
592 static int             fold_preserve_f = FALSE; /* preserve new lines */
593 static int             fold_f  = FALSE;
594 static int             fold_len  = 0;
595
596 /* options */
597 static unsigned char   kanji_intro = DEFAULT_J;
598 static unsigned char   ascii_intro = DEFAULT_R;
599
600 /* Folding */
601
602 #define FOLD_MARGIN  10
603 #define DEFAULT_FOLD 60
604
605 static int             fold_margin  = FOLD_MARGIN;
606
607 /* converters */
608
609 #ifdef DEFAULT_CODE_JIS
610 #   define  DEFAULT_CONV j_oconv
611 #endif
612 #ifdef DEFAULT_CODE_SJIS
613 #   define  DEFAULT_CONV s_oconv
614 #endif
615 #ifdef DEFAULT_CODE_EUC
616 #   define  DEFAULT_CONV e_oconv
617 #endif
618 #ifdef DEFAULT_CODE_UTF8
619 #   define  DEFAULT_CONV w_oconv
620 #endif
621
622 /* process default */
623 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
624
625 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
626 /* s_iconv or oconv */
627 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
628
629 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
630 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
631 static void (*o_crconv)(nkf_char c2,nkf_char c1) = no_connection;
632 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
633 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
634 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
635 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
636
637 /* static redirections */
638
639 static  void   (*o_putc)(nkf_char c) = std_putc;
640
641 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
642 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
643
644 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
645 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
646
647 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
648
649 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
650 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
651
652 /* for strict mime */
653 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
654 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
655
656 /* Global states */
657 static int output_mode = ASCII,    /* output kanji mode */
658            input_mode =  ASCII,    /* input kanji mode */
659            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
660 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
661
662 /* X0201 / X0208 conversion tables */
663
664 /* X0201 kana conversion table */
665 /* 90-9F A0-DF */
666 static const
667 unsigned char cv[]= {
668     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
669     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
670     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
671     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
672     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
673     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
674     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
675     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
676     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
677     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
678     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
679     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
680     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
681     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
682     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
683     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
684     0x00,0x00};
685
686
687 /* X0201 kana conversion table for daguten */
688 /* 90-9F A0-DF */
689 static const
690 unsigned char dv[]= { 
691     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
692     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
693     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
694     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
695     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
696     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
697     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
698     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
699     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
700     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
701     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
702     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
703     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
704     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
705     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
706     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
707     0x00,0x00};
708
709 /* X0201 kana conversion table for han-daguten */
710 /* 90-9F A0-DF */
711 static const
712 unsigned char ev[]= { 
713     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
714     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
715     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
716     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
717     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
718     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
719     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
720     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
721     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
722     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
723     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
724     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
725     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
726     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
727     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
728     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
729     0x00,0x00};
730
731
732 /* X0208 kigou conversion table */
733 /* 0x8140 - 0x819e */
734 static const
735 unsigned char fv[] = {
736
737     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
738     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
739     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
740     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
741     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
742     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
743     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
744     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
745     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
746     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
747     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
748     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
749 } ;
750
751
752 #define    CRLF      1
753
754 static int             file_out_f = FALSE;
755 #ifdef OVERWRITE
756 static int             overwrite_f = FALSE;
757 static int             preserve_time_f = FALSE;
758 static int             backup_f = FALSE;
759 static char            *backup_suffix = "";
760 static char *get_backup_filename(const char *suffix, const char *filename);
761 #endif
762
763 static int             crmode_f = 0;   /* CR, NL, CRLF */
764 #ifdef EASYWIN /*Easy Win */
765 static int             end_check;
766 #endif /*Easy Win */
767
768 #define STD_GC_BUFSIZE (256)
769 nkf_char std_gc_buf[STD_GC_BUFSIZE];
770 nkf_char std_gc_ndx;
771
772 #ifdef WIN32DLL
773 #include "nkf32dll.c"
774 #elif defined(PERL_XS)
775 #else /* WIN32DLL */
776 int main(int argc, char **argv)
777 {
778     FILE  *fin;
779     unsigned char  *cp;
780
781     char *outfname = NULL;
782     char *origfname;
783
784 #ifdef EASYWIN /*Easy Win */
785     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
786 #endif
787
788     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
789         cp = (unsigned char *)*argv;
790         options(cp);
791 #ifdef EXEC_IO
792         if (exec_f){
793             int fds[2], pid;
794             if (pipe(fds) < 0 || (pid = fork()) < 0){
795                 abort();
796             }
797             if (pid == 0){
798                 if (exec_f > 0){
799                     close(fds[0]);
800                     dup2(fds[1], 1);
801                 }else{
802                     close(fds[1]);
803                     dup2(fds[0], 0);
804                 }
805                 execvp(argv[1], &argv[1]);
806             }
807             if (exec_f > 0){
808                 close(fds[1]);
809                 dup2(fds[0], 0);
810             }else{
811                 close(fds[0]);
812                 dup2(fds[1], 1);
813             }
814             argc = 0;
815             break;
816         }
817 #endif
818     }
819     if(x0201_f == WISH_TRUE)
820          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
821
822     if (binmode_f == TRUE)
823 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
824     if (freopen("","wb",stdout) == NULL) 
825         return (-1);
826 #else
827     setbinmode(stdout);
828 #endif
829
830     if (unbuf_f)
831       setbuf(stdout, (char *) NULL);
832     else
833       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
834
835     if (argc == 0) {
836       if (binmode_f == TRUE)
837 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
838       if (freopen("","rb",stdin) == NULL) return (-1);
839 #else
840       setbinmode(stdin);
841 #endif
842       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
843       if (nop_f)
844           noconvert(stdin);
845       else {
846           kanji_convert(stdin);
847           if (guess_f) print_guessed_code(NULL);
848       }
849     } else {
850       int nfiles = argc;
851       while (argc--) {
852             is_inputcode_mixed = FALSE;
853             is_inputcode_set   = FALSE;
854             input_codename = "";
855 #ifdef CHECK_OPTION
856             iconv_for_check = 0;
857 #endif
858           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
859               perror(*--argv);
860               return(-1);
861           } else {
862 #ifdef OVERWRITE
863               int fd = 0;
864               int fd_backup = 0;
865 #endif
866
867 /* reopen file for stdout */
868               if (file_out_f == TRUE) {
869 #ifdef OVERWRITE
870                   if (overwrite_f){
871                       outfname = malloc(strlen(origfname)
872                                         + strlen(".nkftmpXXXXXX")
873                                         + 1);
874                       if (!outfname){
875                           perror(origfname);
876                           return -1;
877                       }
878                       strcpy(outfname, origfname);
879 #ifdef MSDOS
880                       {
881                           int i;
882                           for (i = strlen(outfname); i; --i){
883                               if (outfname[i - 1] == '/'
884                                   || outfname[i - 1] == '\\'){
885                                   break;
886                               }
887                           }
888                           outfname[i] = '\0';
889                       }
890                       strcat(outfname, "ntXXXXXX");
891                       mktemp(outfname);
892                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
893                                 S_IREAD | S_IWRITE);
894 #else
895                       strcat(outfname, ".nkftmpXXXXXX");
896                       fd = mkstemp(outfname);
897 #endif
898                       if (fd < 0
899                           || (fd_backup = dup(fileno(stdout))) < 0
900                           || dup2(fd, fileno(stdout)) < 0
901                           ){
902                           perror(origfname);
903                           return -1;
904                       }
905                   }else
906 #endif
907                   if(argc == 1 ) {
908                       outfname = *argv++;
909                       argc--;
910                   } else {
911                       outfname = "nkf.out";
912                   }
913
914                   if(freopen(outfname, "w", stdout) == NULL) {
915                       perror (outfname);
916                       return (-1);
917                   }
918                   if (binmode_f == TRUE) {
919 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
920                       if (freopen("","wb",stdout) == NULL) 
921                            return (-1);
922 #else
923                       setbinmode(stdout);
924 #endif
925                   }
926               }
927               if (binmode_f == TRUE)
928 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
929                  if (freopen("","rb",fin) == NULL) 
930                     return (-1);
931 #else
932                  setbinmode(fin);
933 #endif 
934               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
935               if (nop_f)
936                   noconvert(fin);
937               else {
938                   char *filename = NULL;
939                   kanji_convert(fin);
940                   if (nfiles > 1) filename = origfname;
941                   if (guess_f) print_guessed_code(filename);
942               }
943               fclose(fin);
944 #ifdef OVERWRITE
945               if (overwrite_f) {
946                   struct stat     sb;
947 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
948                   time_t tb[2];
949 #else
950                   struct utimbuf  tb;
951 #endif
952
953                   fflush(stdout);
954                   close(fd);
955                   if (dup2(fd_backup, fileno(stdout)) < 0){
956                       perror("dup2");
957                   }
958                   if (stat(origfname, &sb)) {
959                       fprintf(stderr, "Can't stat %s\n", origfname);
960                   }
961                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
962                   if (chmod(outfname, sb.st_mode)) {
963                       fprintf(stderr, "Can't set permission %s\n", outfname);
964                   }
965
966                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
967                     if(preserve_time_f){
968 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
969                         tb[0] = tb[1] = sb.st_mtime;
970                         if (utime(outfname, tb)) {
971                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
972                         }
973 #else
974                         tb.actime  = sb.st_atime;
975                         tb.modtime = sb.st_mtime;
976                         if (utime(outfname, &tb)) {
977                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
978                         }
979 #endif
980                     }
981                     if(backup_f){
982                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
983 #ifdef MSDOS
984                         unlink(backup_filename);
985 #endif
986                         if (rename(origfname, backup_filename)) {
987                             perror(backup_filename);
988                             fprintf(stderr, "Can't rename %s to %s\n",
989                                     origfname, backup_filename);
990                         }
991                     }else{
992 #ifdef MSDOS
993                         if (unlink(origfname)){
994                             perror(origfname);
995                         }
996 #endif
997                     }
998                   if (rename(outfname, origfname)) {
999                       perror(origfname);
1000                       fprintf(stderr, "Can't rename %s to %s\n",
1001                               outfname, origfname);
1002                   }
1003                   free(outfname);
1004               }
1005 #endif
1006           }
1007       }
1008     }
1009 #ifdef EASYWIN /*Easy Win */
1010     if (file_out_f == FALSE) 
1011         scanf("%d",&end_check);
1012     else 
1013         fclose(stdout);
1014 #else /* for Other OS */
1015     if (file_out_f == TRUE) 
1016         fclose(stdout);
1017 #endif /*Easy Win */
1018     return (0);
1019 }
1020 #endif /* WIN32DLL */
1021
1022 #ifdef OVERWRITE
1023 char *get_backup_filename(const char *suffix, const char *filename)
1024 {
1025     char *backup_filename;
1026     int asterisk_count = 0;
1027     int i, j;
1028     int filename_length = strlen(filename);
1029
1030     for(i = 0; suffix[i]; i++){
1031         if(suffix[i] == '*') asterisk_count++;
1032     }
1033
1034     if(asterisk_count){
1035         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1036         if (!backup_filename){
1037             perror("Can't malloc backup filename.");
1038             return NULL;
1039         }
1040
1041         for(i = 0, j = 0; suffix[i];){
1042             if(suffix[i] == '*'){
1043                 backup_filename[j] = '\0';
1044                 strncat(backup_filename, filename, filename_length);
1045                 i++;
1046                 j += filename_length;
1047             }else{
1048                 backup_filename[j++] = suffix[i++];
1049             }
1050         }
1051         backup_filename[j] = '\0';
1052     }else{
1053         j = strlen(suffix) + filename_length;
1054         backup_filename = malloc( + 1);
1055         strcpy(backup_filename, filename);
1056         strcat(backup_filename, suffix);
1057         backup_filename[j] = '\0';
1058     }
1059     return backup_filename;
1060 }
1061 #endif
1062
1063 static const
1064 struct {
1065     const char *name;
1066     const char *alias;
1067 } long_option[] = {
1068     {"ic=", ""},
1069     {"oc=", ""},
1070     {"base64","jMB"},
1071     {"euc","e"},
1072     {"euc-input","E"},
1073     {"fj","jm"},
1074     {"help","v"},
1075     {"jis","j"},
1076     {"jis-input","J"},
1077     {"mac","sLm"},
1078     {"mime","jM"},
1079     {"mime-input","m"},
1080     {"msdos","sLw"},
1081     {"sjis","s"},
1082     {"sjis-input","S"},
1083     {"unix","eLu"},
1084     {"version","V"},
1085     {"windows","sLw"},
1086     {"hiragana","h1"},
1087     {"katakana","h2"},
1088     {"katakana-hiragana","h3"},
1089     {"guess", "g"},
1090     {"cp932", ""},
1091     {"no-cp932", ""},
1092 #ifdef X0212_ENABLE
1093     {"x0212", ""},
1094 #endif
1095 #ifdef UTF8_OUTPUT_ENABLE
1096     {"utf8", "w"},
1097     {"utf16", "w16"},
1098     {"ms-ucs-map", ""},
1099     {"fb-skip", ""},
1100     {"fb-html", ""},
1101     {"fb-xml", ""},
1102     {"fb-perl", ""},
1103     {"fb-java", ""},
1104     {"fb-subchar", ""},
1105     {"fb-subchar=", ""},
1106 #endif
1107 #ifdef UTF8_INPUT_ENABLE
1108     {"utf8-input", "W"},
1109     {"utf16-input", "W16"},
1110     {"no-cp932ext", ""},
1111     {"no-best-fit-chars",""},
1112 #endif
1113 #ifdef UNICODE_NORMALIZATION
1114     {"utf8mac-input", ""},
1115 #endif
1116 #ifdef OVERWRITE
1117     {"overwrite", ""},
1118     {"overwrite=", ""},
1119     {"in-place", ""},
1120     {"in-place=", ""},
1121 #endif
1122 #ifdef INPUT_OPTION
1123     {"cap-input", ""},
1124     {"url-input", ""},
1125 #endif
1126 #ifdef NUMCHAR_OPTION
1127     {"numchar-input", ""},
1128 #endif
1129 #ifdef CHECK_OPTION
1130     {"no-output", ""},
1131     {"debug", ""},
1132 #endif
1133 #ifdef SHIFTJIS_CP932
1134     {"cp932inv", ""},
1135 #endif
1136 #ifdef EXEC_IO
1137     {"exec-in", ""},
1138     {"exec-out", ""},
1139 #endif
1140     {"prefix=", ""},
1141 };
1142
1143 static int option_mode = 0;
1144
1145 void options(unsigned char *cp)
1146 {
1147     nkf_char i, j;
1148     unsigned char *p;
1149     unsigned char *cp_back = NULL;
1150     char codeset[32];
1151
1152     if (option_mode==1)
1153         return;
1154     while(*cp && *cp++!='-');
1155     while (*cp || cp_back) {
1156         if(!*cp){
1157             cp = cp_back;
1158             cp_back = NULL;
1159             continue;
1160         }
1161         p = 0;
1162         switch (*cp++) {
1163         case '-':  /* literal options */
1164             if (!*cp || *cp == SPACE) {        /* ignore the rest of arguments */
1165                 option_mode = 1;
1166                 return;
1167             }
1168             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1169                 p = (unsigned char *)long_option[i].name;
1170                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1171                 if (*p == cp[j] || cp[j] == ' '){
1172                     p = &cp[j] + 1;
1173                     break;
1174                 }
1175                 p = 0;
1176             }
1177             if (p == 0) return;
1178             while(*cp && *cp != SPACE && cp++);
1179             if (long_option[i].alias[0]){
1180                 cp_back = cp;
1181                 cp = (unsigned char *)long_option[i].alias;
1182             }else{
1183                 if (strcmp(long_option[i].name, "ic=") == 0){
1184                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1185                         codeset[i] = nkf_toupper(p[i]);
1186                     }
1187                     codeset[i] = 0;
1188                     if(strcmp(codeset, "ISO-2022-JP") == 0 ||
1189                       strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1190                       strcmp(codeset, "CP50220") == 0 ||
1191                       strcmp(codeset, "CP50221") == 0 ||
1192                       strcmp(codeset, "CP50222") == 0 ||
1193                       strcmp(codeset, "ISO-2022-JP-MS") == 0){
1194                         input_f = JIS_INPUT;
1195                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1196                         input_f = JIS_INPUT;
1197 #ifdef X0212_ENABLE
1198                         x0212_f = TRUE;
1199 #endif
1200                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1201                         input_f = JIS_INPUT;
1202 #ifdef X0212_ENABLE
1203                         x0212_f = TRUE;
1204 #endif
1205                         x0213_f = TRUE;
1206                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1207                         input_f = SJIS_INPUT;
1208                         if (x0201_f==NO_X0201) x0201_f=TRUE;
1209                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1210                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1211                              strcmp(codeset, "CP932") == 0 ||
1212                              strcmp(codeset, "MS932") == 0){
1213                         input_f = SJIS_INPUT;
1214                         x0201_f = FALSE;
1215 #ifdef SHIFTJIS_CP932
1216                         cp51932_f = TRUE;
1217 #endif
1218 #ifdef UTF8_OUTPUT_ENABLE
1219                         ms_ucs_map_f = UCS_MAP_CP932;
1220 #endif
1221                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1222                              strcmp(codeset, "EUC-JP") == 0){
1223                         input_f = JIS_INPUT;
1224                     }else if(strcmp(codeset, "CP51932") == 0){
1225                         input_f = JIS_INPUT;
1226                         x0201_f = FALSE;
1227 #ifdef SHIFTJIS_CP932
1228                         cp51932_f = TRUE;
1229 #endif
1230 #ifdef UTF8_OUTPUT_ENABLE
1231                         ms_ucs_map_f = UCS_MAP_CP932;
1232 #endif
1233                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1234                              strcmp(codeset, "EUCJP-MS") == 0 ||
1235                              strcmp(codeset, "EUCJPMS") == 0){
1236                         input_f = JIS_INPUT;
1237                         x0201_f = FALSE;
1238 #ifdef SHIFTJIS_CP932
1239                         cp51932_f = FALSE;
1240 #endif
1241 #ifdef UTF8_OUTPUT_ENABLE
1242                         ms_ucs_map_f = UCS_MAP_MS;
1243 #endif
1244                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1245                              strcmp(codeset, "EUCJP-ASCII") == 0){
1246                         input_f = JIS_INPUT;
1247                         x0201_f = FALSE;
1248 #ifdef SHIFTJIS_CP932
1249                         cp51932_f = FALSE;
1250 #endif
1251 #ifdef UTF8_OUTPUT_ENABLE
1252                         ms_ucs_map_f = UCS_MAP_ASCII;
1253 #endif
1254                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1255                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1256                         input_f = SJIS_INPUT;
1257                         x0213_f = TRUE;
1258 #ifdef SHIFTJIS_CP932
1259                         cp51932_f = FALSE;
1260                         cp932inv_f = FALSE;
1261 #endif
1262                         if (x0201_f==NO_X0201) x0201_f=TRUE;
1263                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1264                              strcmp(codeset, "EUC-JIS-2004") == 0){
1265                         input_f = JIS_INPUT;
1266                         x0201_f = FALSE;
1267                         x0213_f = TRUE;
1268 #ifdef SHIFTJIS_CP932
1269                         cp51932_f = FALSE;
1270                         cp932inv_f = FALSE;
1271 #endif
1272 #ifdef UTF8_INPUT_ENABLE
1273                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1274                              strcmp(codeset, "UTF-8N") == 0 ||
1275                              strcmp(codeset, "UTF-8-BOM") == 0){
1276                         input_f = UTF8_INPUT;
1277 #ifdef UNICODE_NORMALIZATION
1278                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1279                              strcmp(codeset, "UTF-8-MAC") == 0){
1280                         input_f = UTF8_INPUT;
1281                         nfc_f = TRUE;
1282 #endif
1283                     }else if(strcmp(codeset, "UTF-16") == 0){
1284                         input_f = UTF16BE_INPUT;
1285                         utf16_mode = UTF16BE_INPUT;
1286                     }else if(strcmp(codeset, "UTF-16BE") == 0 ||
1287                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1288                         input_f = UTF16BE_INPUT;
1289                         utf16_mode = UTF16BE_INPUT;
1290                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1291                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1292                         input_f = UTF16LE_INPUT;
1293                         utf16_mode = UTF16LE_INPUT;
1294 #endif
1295                     }
1296                     continue;
1297                 }
1298                 if (strcmp(long_option[i].name, "oc=") == 0){
1299                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1300                         codeset[i] = nkf_toupper(p[i]);
1301                     }
1302                     codeset[i] = 0;
1303                     if(strcmp(codeset, "ISO-2022-JP") == 0 ||
1304                        strcmp(codeset, "CP50220") == 0){
1305                         output_conv = j_oconv;
1306                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1307                         output_conv = j_oconv;
1308                         no_cp932ext_f = TRUE;
1309                     }else if(strcmp(codeset, "CP50221") == 0 ||
1310                              strcmp(codeset, "ISO-2022-JP-MS") == 0){
1311                         output_conv = j_oconv;
1312                         x0201_f = FALSE;
1313                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1314                         output_conv = j_oconv;
1315 #ifdef X0212_ENABLE
1316                         x0212_f = TRUE;
1317 #endif
1318 #ifdef SHIFTJIS_CP932
1319                         cp51932_f = FALSE;
1320 #endif
1321                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1322                         output_conv = j_oconv;
1323 #ifdef X0212_ENABLE
1324                         x0212_f = TRUE;
1325 #endif
1326                         x0213_f = TRUE;
1327 #ifdef SHIFTJIS_CP932
1328                         cp51932_f = FALSE;
1329 #endif
1330                     }else if(strcmp(codeset, "ISO-2022-JP-MS") == 0){
1331                         output_conv = j_oconv;
1332                         x0201_f = FALSE;
1333 #ifdef X0212_ENABLE
1334                         x0212_f = TRUE;
1335 #endif
1336 #ifdef SHIFTJIS_CP932
1337                         cp51932_f = FALSE;
1338 #endif
1339                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1340                         output_conv = s_oconv;
1341                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1342                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1343                              strcmp(codeset, "CP932") == 0 ||
1344                              strcmp(codeset, "MS932") == 0){
1345                         output_conv = s_oconv;
1346                         x0201_f = FALSE;
1347 #ifdef SHIFTJIS_CP932
1348                         cp51932_f = TRUE;
1349                         cp932inv_f = TRUE;
1350 #endif
1351 #ifdef UTF8_OUTPUT_ENABLE
1352                         ms_ucs_map_f = UCS_MAP_CP932;
1353 #endif
1354                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1355                              strcmp(codeset, "EUC-JP") == 0){
1356                         output_conv = e_oconv;
1357                     }else if(strcmp(codeset, "CP51932") == 0){
1358                         output_conv = e_oconv;
1359                         x0201_f = FALSE;
1360 #ifdef SHIFTJIS_CP932
1361                         cp51932_f = TRUE;
1362 #endif
1363 #ifdef UTF8_OUTPUT_ENABLE
1364                         ms_ucs_map_f = UCS_MAP_CP932;
1365 #endif
1366                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1367                              strcmp(codeset, "EUCJP-MS") == 0 ||
1368                              strcmp(codeset, "EUCJPMS") == 0){
1369                         output_conv = e_oconv;
1370                         x0201_f = FALSE;
1371 #ifdef X0212_ENABLE
1372                         x0212_f = TRUE;
1373 #endif
1374 #ifdef SHIFTJIS_CP932
1375                         cp51932_f = FALSE;
1376 #endif
1377 #ifdef UTF8_OUTPUT_ENABLE
1378                         ms_ucs_map_f = UCS_MAP_MS;
1379 #endif
1380                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1381                              strcmp(codeset, "EUCJP-ASCII") == 0){
1382                         output_conv = e_oconv;
1383                         x0201_f = FALSE;
1384 #ifdef X0212_ENABLE
1385                         x0212_f = TRUE;
1386 #endif
1387 #ifdef SHIFTJIS_CP932
1388                         cp51932_f = FALSE;
1389 #endif
1390 #ifdef UTF8_OUTPUT_ENABLE
1391                         ms_ucs_map_f = UCS_MAP_ASCII;
1392 #endif
1393                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1394                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1395                         output_conv = s_oconv;
1396                         x0213_f = TRUE;
1397 #ifdef SHIFTJIS_CP932
1398                         cp932inv_f = FALSE;
1399 #endif
1400                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1401                              strcmp(codeset, "EUC-JIS-2004") == 0){
1402                         output_conv = e_oconv;
1403 #ifdef X0212_ENABLE
1404                         x0212_f = TRUE;
1405 #endif
1406                         x0213_f = TRUE;
1407 #ifdef SHIFTJIS_CP932
1408                         cp51932_f = FALSE;
1409 #endif
1410 #ifdef UTF8_OUTPUT_ENABLE
1411                     }else if(strcmp(codeset, "UTF-8") == 0){
1412                         output_conv = w_oconv;
1413                     }else if(strcmp(codeset, "UTF-8N") == 0){
1414                         output_conv = w_oconv;
1415                         unicode_bom_f=1;
1416                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1417                         output_conv = w_oconv;
1418                         unicode_bom_f=2;
1419                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1420                         output_conv = w_oconv16; 
1421                         unicode_bom_f=1;
1422                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1423                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1424                         output_conv = w_oconv16; 
1425                         unicode_bom_f=2;
1426                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1427                         output_conv = w_oconv16; 
1428                         w_oconv16_LE = 1;
1429                         unicode_bom_f=1;
1430                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1431                         output_conv = w_oconv16; 
1432                         w_oconv16_LE = 1;
1433                         unicode_bom_f=2;
1434 #endif
1435                     }
1436                     continue;
1437                 }
1438 #ifdef OVERWRITE
1439                 if (strcmp(long_option[i].name, "overwrite") == 0){
1440                     file_out_f = TRUE;
1441                     overwrite_f = TRUE;
1442                     preserve_time_f = TRUE;
1443                     continue;
1444                 }
1445                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1446                     file_out_f = TRUE;
1447                     overwrite_f = TRUE;
1448                     preserve_time_f = TRUE;
1449                     backup_f = TRUE;
1450                     backup_suffix = malloc(strlen((char *) p) + 1);
1451                     strcpy(backup_suffix, (char *) p);
1452                     continue;
1453                 }
1454                 if (strcmp(long_option[i].name, "in-place") == 0){
1455                     file_out_f = TRUE;
1456                     overwrite_f = TRUE;
1457                     preserve_time_f = FALSE;
1458                     continue;
1459                 }
1460                 if (strcmp(long_option[i].name, "in-place=") == 0){
1461                     file_out_f = TRUE;
1462                     overwrite_f = TRUE;
1463                     preserve_time_f = FALSE;
1464                     backup_f = TRUE;
1465                     backup_suffix = malloc(strlen((char *) p) + 1);
1466                     strcpy(backup_suffix, (char *) p);
1467                     continue;
1468                 }
1469 #endif
1470 #ifdef INPUT_OPTION
1471                 if (strcmp(long_option[i].name, "cap-input") == 0){
1472                     cap_f = TRUE;
1473                     continue;
1474                 }
1475                 if (strcmp(long_option[i].name, "url-input") == 0){
1476                     url_f = TRUE;
1477                     continue;
1478                 }
1479 #endif
1480 #ifdef NUMCHAR_OPTION
1481                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1482                     numchar_f = TRUE;
1483                     continue;
1484                 }
1485 #endif
1486 #ifdef CHECK_OPTION
1487                 if (strcmp(long_option[i].name, "no-output") == 0){
1488                     noout_f = TRUE;
1489                     continue;
1490                 }
1491                 if (strcmp(long_option[i].name, "debug") == 0){
1492                     debug_f = TRUE;
1493                     continue;
1494                 }
1495 #endif
1496                 if (strcmp(long_option[i].name, "cp932") == 0){
1497 #ifdef SHIFTJIS_CP932
1498                     cp51932_f = TRUE;
1499                     cp932inv_f = TRUE;
1500 #endif
1501 #ifdef UTF8_OUTPUT_ENABLE
1502                     ms_ucs_map_f = UCS_MAP_CP932;
1503 #endif
1504                     continue;
1505                 }
1506                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1507 #ifdef SHIFTJIS_CP932
1508                     cp51932_f = FALSE;
1509                     cp932inv_f = FALSE;
1510 #endif
1511 #ifdef UTF8_OUTPUT_ENABLE
1512                     ms_ucs_map_f = UCS_MAP_ASCII;
1513 #endif
1514                     continue;
1515                 }
1516 #ifdef SHIFTJIS_CP932
1517                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1518                     cp932inv_f = TRUE;
1519                     continue;
1520                 }
1521 #endif
1522
1523 #ifdef X0212_ENABLE
1524                 if (strcmp(long_option[i].name, "x0212") == 0){
1525                     x0212_f = TRUE;
1526                     continue;
1527                 }
1528 #endif
1529
1530 #ifdef EXEC_IO
1531                   if (strcmp(long_option[i].name, "exec-in") == 0){
1532                       exec_f = 1;
1533                       return;
1534                   }
1535                   if (strcmp(long_option[i].name, "exec-out") == 0){
1536                       exec_f = -1;
1537                       return;
1538                   }
1539 #endif
1540 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1541                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1542                     no_cp932ext_f = TRUE;
1543                     continue;
1544                 }
1545                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1546                     no_best_fit_chars_f = TRUE;
1547                     continue;
1548                 }
1549                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1550                     encode_fallback = NULL;
1551                     continue;
1552                 }
1553                 if (strcmp(long_option[i].name, "fb-html") == 0){
1554                     encode_fallback = encode_fallback_html;
1555                     continue;
1556                 }
1557                 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1558                     encode_fallback = encode_fallback_xml;
1559                     continue;
1560                 }
1561                 if (strcmp(long_option[i].name, "fb-java") == 0){
1562                     encode_fallback = encode_fallback_java;
1563                     continue;
1564                 }
1565                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1566                     encode_fallback = encode_fallback_perl;
1567                     continue;
1568                 }
1569                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1570                     encode_fallback = encode_fallback_subchar;
1571                     continue;
1572                 }
1573                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1574                     encode_fallback = encode_fallback_subchar;
1575                     unicode_subchar = 0;
1576                     if (p[0] != '0'){
1577                         /* decimal number */
1578                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1579                             unicode_subchar *= 10;
1580                             unicode_subchar += hex2bin(p[i]);
1581                         }
1582                     }else if(p[1] == 'x' || p[1] == 'X'){
1583                         /* hexadecimal number */
1584                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1585                             unicode_subchar <<= 4;
1586                             unicode_subchar |= hex2bin(p[i]);
1587                         }
1588                     }else{
1589                         /* octal number */
1590                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1591                             unicode_subchar *= 8;
1592                             unicode_subchar += hex2bin(p[i]);
1593                         }
1594                     }
1595                     w16e_conv(unicode_subchar, &i, &j);
1596                     unicode_subchar = i<<8 | j;
1597                     continue;
1598                 }
1599 #endif
1600 #ifdef UTF8_OUTPUT_ENABLE
1601                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1602                     ms_ucs_map_f = UCS_MAP_MS;
1603                     continue;
1604                 }
1605 #endif
1606 #ifdef UNICODE_NORMALIZATION
1607                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1608                     input_f = UTF8_INPUT;
1609                     nfc_f = TRUE;
1610                     continue;
1611                 }
1612 #endif
1613                 if (strcmp(long_option[i].name, "prefix=") == 0){
1614                     if (nkf_isgraph(p[0])){
1615                         for (i = 1; nkf_isgraph(p[i]); i++){
1616                             prefix_table[p[i]] = p[0];
1617                         }
1618                     }
1619                     continue;
1620                 }
1621             }
1622             continue;
1623         case 'b':           /* buffered mode */
1624             unbuf_f = FALSE;
1625             continue;
1626         case 'u':           /* non bufferd mode */
1627             unbuf_f = TRUE;
1628             continue;
1629         case 't':           /* transparent mode */
1630             if (*cp=='1') {
1631                 /* alias of -t */
1632                 nop_f = TRUE;
1633                 *cp++;
1634             } else if (*cp=='2') {
1635                 /*
1636                  * -t with put/get
1637                  *
1638                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1639                  *
1640                  */
1641                 nop_f = 2;
1642                 *cp++;
1643             } else
1644                 nop_f = TRUE;
1645             continue;
1646         case 'j':           /* JIS output */
1647         case 'n':
1648             output_conv = j_oconv;
1649             continue;
1650         case 'e':           /* AT&T EUC output */
1651             output_conv = e_oconv;
1652             continue;
1653         case 's':           /* SJIS output */
1654             output_conv = s_oconv;
1655             continue;
1656         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1657             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1658             input_f = LATIN1_INPUT;
1659             continue;
1660         case 'i':           /* Kanji IN ESC-$-@/B */
1661             if (*cp=='@'||*cp=='B') 
1662                 kanji_intro = *cp++;
1663             continue;
1664         case 'o':           /* ASCII IN ESC-(-J/B */
1665             if (*cp=='J'||*cp=='B'||*cp=='H') 
1666                 ascii_intro = *cp++;
1667             continue;
1668         case 'h':
1669             /*  
1670                 bit:1   katakana->hiragana
1671                 bit:2   hiragana->katakana
1672             */
1673             if ('9'>= *cp && *cp>='0') 
1674                 hira_f |= (*cp++ -'0');
1675             else 
1676                 hira_f |= 1;
1677             continue;
1678         case 'r':
1679             rot_f = TRUE;
1680             continue;
1681 #if defined(MSDOS) || defined(__OS2__) 
1682         case 'T':
1683             binmode_f = FALSE;
1684             continue;
1685 #endif
1686 #ifndef PERL_XS
1687         case 'V':
1688             version();
1689             exit(1);
1690             break;
1691         case 'v':
1692             usage();
1693             exit(1);
1694             break;
1695 #endif
1696 #ifdef UTF8_OUTPUT_ENABLE
1697         case 'w':           /* UTF-8 output */
1698             if ('1'== cp[0] && '6'==cp[1]) {
1699                 output_conv = w_oconv16; cp+=2;
1700                 if (cp[0]=='L') {
1701                     unicode_bom_f=2; cp++;
1702                     w_oconv16_LE = 1;
1703                     if (cp[0] == '0'){
1704                         unicode_bom_f=1; cp++;
1705                     }
1706                 } else if (cp[0] == 'B') {
1707                     unicode_bom_f=2; cp++;
1708                     if (cp[0] == '0'){
1709                         unicode_bom_f=1; cp++;
1710                     }
1711                 } 
1712             } else if (cp[0] == '8') {
1713                 output_conv = w_oconv; cp++;
1714                 unicode_bom_f=2;
1715                 if (cp[0] == '0'){
1716                     unicode_bom_f=1; cp++;
1717                 }
1718             } else
1719                 output_conv = w_oconv;
1720             continue;
1721 #endif
1722 #ifdef UTF8_INPUT_ENABLE
1723         case 'W':           /* UTF-8 input */
1724             if ('1'== cp[0] && '6'==cp[1]) {
1725                 input_f = UTF16BE_INPUT;
1726                 utf16_mode = UTF16BE_INPUT;
1727                 cp += 2;
1728                 if (cp[0]=='L') {
1729                     cp++;
1730                     input_f = UTF16LE_INPUT;
1731                     utf16_mode = UTF16LE_INPUT;
1732                 } else if (cp[0] == 'B') {
1733                     cp++;
1734                     input_f = UTF16BE_INPUT;
1735                     utf16_mode = UTF16BE_INPUT;
1736                 }
1737             } else if (cp[0] == '8') {
1738                 cp++;
1739                 input_f = UTF8_INPUT;
1740             } else
1741                 input_f = UTF8_INPUT;
1742             continue;
1743 #endif
1744         /* Input code assumption */
1745         case 'J':   /* JIS input */
1746         case 'E':   /* AT&T EUC input */
1747             input_f = JIS_INPUT;
1748             continue;
1749         case 'S':   /* MS Kanji input */
1750             input_f = SJIS_INPUT;
1751             if (x0201_f==NO_X0201) x0201_f=TRUE;
1752             continue;
1753         case 'Z':   /* Convert X0208 alphabet to asii */
1754             /*  bit:0   Convert X0208
1755                 bit:1   Convert Kankaku to one space
1756                 bit:2   Convert Kankaku to two spaces
1757                 bit:3   Convert HTML Entity
1758             */
1759             if ('9'>= *cp && *cp>='0') 
1760                 alpha_f |= 1<<(*cp++ -'0');
1761             else 
1762                 alpha_f |= TRUE;
1763             continue;
1764         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1765             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1766             /* accept  X0201
1767                     ESC-(-I     in JIS, EUC, MS Kanji
1768                     SI/SO       in JIS, EUC, MS Kanji
1769                     SSO         in EUC, JIS, not in MS Kanji
1770                     MS Kanji (0xa0-0xdf) 
1771                output  X0201
1772                     ESC-(-I     in JIS (0x20-0x5f)
1773                     SSO         in EUC (0xa0-0xdf)
1774                     0xa0-0xd    in MS Kanji (0xa0-0xdf) 
1775             */
1776             continue;
1777         case 'X':   /* Assume X0201 kana */
1778             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1779             x0201_f = TRUE;
1780             continue;
1781         case 'F':   /* prserve new lines */
1782             fold_preserve_f = TRUE;
1783         case 'f':   /* folding -f60 or -f */
1784             fold_f = TRUE;
1785             fold_len = 0;
1786             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1787                 fold_len *= 10;
1788                 fold_len += *cp++ - '0';
1789             }
1790             if (!(0<fold_len && fold_len<BUFSIZ)) 
1791                 fold_len = DEFAULT_FOLD;
1792             if (*cp=='-') {
1793                 fold_margin = 0;
1794                 cp++;
1795                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1796                     fold_margin *= 10;
1797                     fold_margin += *cp++ - '0';
1798                 }
1799             }
1800             continue;
1801         case 'm':   /* MIME support */
1802             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1803             if (*cp=='B'||*cp=='Q') {
1804                 mime_decode_mode = *cp++;
1805                 mimebuf_f = FIXED_MIME;
1806             } else if (*cp=='N') {
1807                 mime_f = TRUE; cp++;
1808             } else if (*cp=='S') {
1809                 mime_f = STRICT_MIME; cp++;
1810             } else if (*cp=='0') {
1811                 mime_decode_f = FALSE;
1812                 mime_f = FALSE; cp++;
1813             }
1814             continue;
1815         case 'M':   /* MIME output */
1816             if (*cp=='B') {
1817                 mimeout_mode = 'B';
1818                 mimeout_f = FIXED_MIME; cp++;
1819             } else if (*cp=='Q') {
1820                 mimeout_mode = 'Q';
1821                 mimeout_f = FIXED_MIME; cp++;
1822             } else {
1823                 mimeout_f = TRUE;
1824             }
1825             continue;
1826         case 'B':   /* Broken JIS support */
1827             /*  bit:0   no ESC JIS
1828                 bit:1   allow any x on ESC-(-x or ESC-$-x
1829                 bit:2   reset to ascii on NL
1830             */
1831             if ('9'>= *cp && *cp>='0') 
1832                 broken_f |= 1<<(*cp++ -'0');
1833             else 
1834                 broken_f |= TRUE;
1835             continue;
1836 #ifndef PERL_XS
1837         case 'O':/* for Output file */
1838             file_out_f = TRUE;
1839             continue;
1840 #endif
1841         case 'c':/* add cr code */
1842             crmode_f = CRLF;
1843             continue;
1844         case 'd':/* delete cr code */
1845             crmode_f = NL;
1846             continue;
1847         case 'I':   /* ISO-2022-JP output */
1848             iso2022jp_f = TRUE;
1849             continue;
1850         case 'L':  /* line mode */
1851             if (*cp=='u') {         /* unix */
1852                 crmode_f = NL; cp++;
1853             } else if (*cp=='m') { /* mac */
1854                 crmode_f = CR; cp++;
1855             } else if (*cp=='w') { /* windows */
1856                 crmode_f = CRLF; cp++;
1857             } else if (*cp=='0') { /* no conversion  */
1858                 crmode_f = 0; cp++;
1859             }
1860             continue;
1861         case 'g':
1862 #ifndef PERL_XS
1863             guess_f = TRUE;
1864 #endif
1865             continue;
1866         case ' ':    
1867         /* module muliple options in a string are allowed for Perl moudle  */
1868             while(*cp && *cp++!='-');
1869             continue;
1870         default:
1871             /* bogus option but ignored */
1872             continue;
1873         }
1874     }
1875 }
1876
1877 #ifdef ANSI_C_PROTOTYPE
1878 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1879 #else
1880 struct input_code * find_inputcode_byfunc(iconv_func)
1881      nkf_char (*iconv_func)();
1882 #endif
1883 {
1884     if (iconv_func){
1885         struct input_code *p = input_code_list;
1886         while (p->name){
1887             if (iconv_func == p->iconv_func){
1888                 return p;
1889             }
1890             p++;
1891         }
1892     }
1893     return 0;
1894 }
1895
1896 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1897 {
1898 #ifdef INPUT_CODE_FIX
1899     if (f || !input_f)
1900 #endif
1901         if (estab_f != f){
1902             estab_f = f;
1903         }
1904
1905     if (iconv_func
1906 #ifdef INPUT_CODE_FIX
1907         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1908 #endif
1909         ){
1910         iconv = iconv_func;
1911     }
1912 #ifdef CHECK_OPTION
1913     if (estab_f && iconv_for_check != iconv){
1914         struct input_code *p = find_inputcode_byfunc(iconv);
1915         if (p){
1916             set_input_codename(p->name);
1917             debug(input_codename);
1918         }
1919         iconv_for_check = iconv;
1920     }
1921 #endif
1922 }
1923
1924 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1925 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1926 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1927 #ifdef SHIFTJIS_CP932
1928 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B */
1929 #define SCORE_NO_EXIST (SCORE_CP932 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1930 #else
1931 #define SCORE_NO_EXIST (SCORE_DEPEND << 1)   /* \e$BB8:_$7$J$$J8;z\e(B */
1932 #endif
1933 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1934 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1935
1936 #define SCORE_INIT (SCORE_iMIME)
1937
1938 const nkf_char score_table_A0[] = {
1939     0, 0, 0, 0,
1940     0, 0, 0, 0,
1941     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1942     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1943 };
1944
1945 const nkf_char score_table_F0[] = {
1946     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1947     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1948     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1949     SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1950 };
1951
1952 void set_code_score(struct input_code *ptr, nkf_char score)
1953 {
1954     if (ptr){
1955         ptr->score |= score;
1956     }
1957 }
1958
1959 void clr_code_score(struct input_code *ptr, nkf_char score)
1960 {
1961     if (ptr){
1962         ptr->score &= ~score;
1963     }
1964 }
1965
1966 void code_score(struct input_code *ptr)
1967 {
1968     nkf_char c2 = ptr->buf[0];
1969 #ifdef UTF8_OUTPUT_ENABLE
1970     nkf_char c1 = ptr->buf[1];
1971 #endif
1972     if (c2 < 0){
1973         set_code_score(ptr, SCORE_ERROR);
1974     }else if (c2 == SSO){
1975         set_code_score(ptr, SCORE_KANA);
1976 #ifdef UTF8_OUTPUT_ENABLE
1977     }else if (!e2w_conv(c2, c1)){
1978         set_code_score(ptr, SCORE_NO_EXIST);
1979 #endif
1980     }else if ((c2 & 0x70) == 0x20){
1981         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1982     }else if ((c2 & 0x70) == 0x70){
1983         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1984     }else if ((c2 & 0x70) >= 0x50){
1985         set_code_score(ptr, SCORE_L2);
1986     }
1987 }
1988
1989 void status_disable(struct input_code *ptr)
1990 {
1991     ptr->stat = -1;
1992     ptr->buf[0] = -1;
1993     code_score(ptr);
1994     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1995 }
1996
1997 void status_push_ch(struct input_code *ptr, nkf_char c)
1998 {
1999     ptr->buf[ptr->index++] = c;
2000 }
2001
2002 void status_clear(struct input_code *ptr)
2003 {
2004     ptr->stat = 0;
2005     ptr->index = 0;
2006 }
2007
2008 void status_reset(struct input_code *ptr)
2009 {
2010     status_clear(ptr);
2011     ptr->score = SCORE_INIT;
2012 }
2013
2014 void status_reinit(struct input_code *ptr)
2015 {
2016     status_reset(ptr);
2017     ptr->_file_stat = 0;
2018 }
2019
2020 void status_check(struct input_code *ptr, nkf_char c)
2021 {
2022     if (c <= DEL && estab_f){
2023         status_reset(ptr);
2024     }
2025 }
2026
2027 void s_status(struct input_code *ptr, nkf_char c)
2028 {
2029     switch(ptr->stat){
2030       case -1:
2031           status_check(ptr, c);
2032           break;
2033       case 0:
2034           if (c <= DEL){
2035               break;
2036 #ifdef NUMCHAR_OPTION
2037           }else if (is_unicode_capsule(c)){
2038               break;
2039 #endif
2040           }else if (0xa1 <= c && c <= 0xdf){
2041               status_push_ch(ptr, SSO);
2042               status_push_ch(ptr, c);
2043               code_score(ptr);
2044               status_clear(ptr);
2045           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2046               ptr->stat = 1;
2047               status_push_ch(ptr, c);
2048 #ifdef SHIFTJIS_CP932
2049           }else if (cp51932_f
2050                     && is_ibmext_in_sjis(c)){
2051               ptr->stat = 2;
2052               status_push_ch(ptr, c);
2053 #endif /* SHIFTJIS_CP932 */
2054 #ifdef X0212_ENABLE
2055           }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2056               ptr->stat = 1;
2057               status_push_ch(ptr, c);
2058 #endif /* X0212_ENABLE */
2059           }else{
2060               status_disable(ptr);
2061           }
2062           break;
2063       case 1:
2064           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2065               status_push_ch(ptr, c);
2066               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2067               code_score(ptr);
2068               status_clear(ptr);
2069           }else{
2070               status_disable(ptr);
2071           }
2072           break;
2073       case 2:
2074 #ifdef SHIFTJIS_CP932
2075           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2076               status_push_ch(ptr, c);
2077               if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2078                   set_code_score(ptr, SCORE_CP932);
2079                   status_clear(ptr);
2080                   break;
2081               }
2082           }
2083 #endif /* SHIFTJIS_CP932 */
2084 #ifndef X0212_ENABLE
2085           status_disable(ptr);
2086 #endif
2087           break;
2088     }
2089 }
2090
2091 void e_status(struct input_code *ptr, nkf_char c)
2092 {
2093     switch (ptr->stat){
2094       case -1:
2095           status_check(ptr, c);
2096           break;
2097       case 0:
2098           if (c <= DEL){
2099               break;
2100 #ifdef NUMCHAR_OPTION
2101           }else if (is_unicode_capsule(c)){
2102               break;
2103 #endif
2104           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2105               ptr->stat = 1;
2106               status_push_ch(ptr, c);
2107 #ifdef X0212_ENABLE
2108           }else if (0x8f == c){
2109               ptr->stat = 2;
2110               status_push_ch(ptr, c);
2111 #endif /* X0212_ENABLE */
2112           }else{
2113               status_disable(ptr);
2114           }
2115           break;
2116       case 1:
2117           if (0xa1 <= c && c <= 0xfe){
2118               status_push_ch(ptr, c);
2119               code_score(ptr);
2120               status_clear(ptr);
2121           }else{
2122               status_disable(ptr);
2123           }
2124           break;
2125 #ifdef X0212_ENABLE
2126       case 2:
2127           if (0xa1 <= c && c <= 0xfe){
2128               ptr->stat = 1;
2129               status_push_ch(ptr, c);
2130           }else{
2131               status_disable(ptr);
2132           }
2133 #endif /* X0212_ENABLE */
2134     }
2135 }
2136
2137 #ifdef UTF8_INPUT_ENABLE
2138 void w16_status(struct input_code *ptr, nkf_char c)
2139 {
2140     switch (ptr->stat){
2141       case -1:
2142           break;
2143       case 0:
2144           if (ptr->_file_stat == 0){
2145               if (c == 0xfe || c == 0xff){
2146                   ptr->stat = c;
2147                   status_push_ch(ptr, c);
2148                   ptr->_file_stat = 1;
2149               }else{
2150                   status_disable(ptr);
2151                   ptr->_file_stat = -1;
2152               }
2153           }else if (ptr->_file_stat > 0){
2154               ptr->stat = 1;
2155               status_push_ch(ptr, c);
2156           }else if (ptr->_file_stat < 0){
2157               status_disable(ptr);
2158           }
2159           break;
2160
2161       case 1:
2162           if (c == EOF){
2163               status_disable(ptr);
2164               ptr->_file_stat = -1;
2165           }else{
2166               status_push_ch(ptr, c);
2167               status_clear(ptr);
2168           }
2169           break;
2170
2171       case 0xfe:
2172       case 0xff:
2173           if (ptr->stat != c && (c == 0xfe || c == 0xff)){
2174               status_push_ch(ptr, c);
2175               status_clear(ptr);
2176           }else{
2177               status_disable(ptr);
2178               ptr->_file_stat = -1;
2179           }
2180           break;
2181     }
2182 }
2183
2184 void w_status(struct input_code *ptr, nkf_char c)
2185 {
2186     switch (ptr->stat){
2187       case -1:
2188           status_check(ptr, c);
2189           break;
2190       case 0:
2191           if (c <= DEL){
2192               break;
2193 #ifdef NUMCHAR_OPTION
2194           }else if (is_unicode_capsule(c)){
2195               break;
2196 #endif
2197           }else if (0xc0 <= c && c <= 0xdf){
2198               ptr->stat = 1;
2199               status_push_ch(ptr, c);
2200           }else if (0xe0 <= c && c <= 0xef){
2201               ptr->stat = 2;
2202               status_push_ch(ptr, c);
2203           }else{
2204               status_disable(ptr);
2205           }
2206           break;
2207       case 1:
2208       case 2:
2209           if (0x80 <= c && c <= 0xbf){
2210               status_push_ch(ptr, c);
2211               if (ptr->index > ptr->stat){
2212                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2213                              && ptr->buf[2] == 0xbf);
2214                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2215                            &ptr->buf[0], &ptr->buf[1]);
2216                   if (!bom){
2217                       code_score(ptr);
2218                   }
2219                   status_clear(ptr);
2220               }
2221           }else{
2222               status_disable(ptr);
2223           }
2224           break;
2225     }
2226 }
2227 #endif
2228
2229 void code_status(nkf_char c)
2230 {
2231     int action_flag = 1;
2232     struct input_code *result = 0;
2233     struct input_code *p = input_code_list;
2234     while (p->name){
2235         (p->status_func)(p, c);
2236         if (p->stat > 0){
2237             action_flag = 0;
2238         }else if(p->stat == 0){
2239             if (result){
2240                 action_flag = 0;
2241             }else{
2242                 result = p;
2243             }
2244         }
2245         ++p;
2246     }
2247
2248     if (action_flag){
2249         if (result && !estab_f){
2250             set_iconv(TRUE, result->iconv_func);
2251         }else if (c <= DEL){
2252             struct input_code *ptr = input_code_list;
2253             while (ptr->name){
2254                 status_reset(ptr);
2255                 ++ptr;
2256             }
2257         }
2258     }
2259 }
2260
2261 #ifndef WIN32DLL
2262 nkf_char std_getc(FILE *f)
2263 {
2264     if (std_gc_ndx){
2265         return std_gc_buf[--std_gc_ndx];
2266     }
2267     return getc(f);
2268 }
2269 #endif /*WIN32DLL*/
2270
2271 nkf_char std_ungetc(nkf_char c, FILE *f)
2272 {
2273     if (std_gc_ndx == STD_GC_BUFSIZE){
2274         return EOF;
2275     }
2276     std_gc_buf[std_gc_ndx++] = c;
2277     return c;
2278 }
2279
2280 #ifndef WIN32DLL
2281 void std_putc(nkf_char c)
2282 {
2283     if(c!=EOF)
2284       putchar(c);
2285 }
2286 #endif /*WIN32DLL*/
2287
2288 #if !defined(PERL_XS) && !defined(WIN32DLL)
2289 nkf_char noconvert(FILE *f)
2290 {
2291     nkf_char    c;
2292
2293     if (nop_f == 2)
2294         module_connection();
2295     while ((c = (*i_getc)(f)) != EOF)
2296       (*o_putc)(c);
2297     (*o_putc)(EOF);
2298     return 1;
2299 }
2300 #endif
2301
2302 void module_connection(void)
2303 {
2304     oconv = output_conv; 
2305     o_putc = std_putc;
2306
2307     /* replace continucation module, from output side */
2308
2309     /* output redicrection */
2310 #ifdef CHECK_OPTION
2311     if (noout_f || guess_f){
2312         o_putc = no_putc;
2313     }
2314 #endif
2315     if (mimeout_f) {
2316         o_mputc = o_putc;
2317         o_putc = mime_putc;
2318         if (mimeout_f == TRUE) {
2319             o_base64conv = oconv; oconv = base64_conv;
2320         }
2321         /* base64_count = 0; */
2322     }
2323
2324     if (crmode_f) {
2325         o_crconv = oconv; oconv = cr_conv;
2326     }
2327     if (rot_f) {
2328         o_rot_conv = oconv; oconv = rot_conv;
2329     }
2330     if (iso2022jp_f) {
2331         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2332     }
2333     if (hira_f) {
2334         o_hira_conv = oconv; oconv = hira_conv;
2335     }
2336     if (fold_f) {
2337         o_fconv = oconv; oconv = fold_conv;
2338         f_line = 0;
2339     }
2340     if (alpha_f || x0201_f) {
2341         o_zconv = oconv; oconv = z_conv;
2342     }
2343
2344     i_getc = std_getc;
2345     i_ungetc = std_ungetc;
2346     /* input redicrection */
2347 #ifdef INPUT_OPTION
2348     if (cap_f){
2349         i_cgetc = i_getc; i_getc = cap_getc;
2350         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2351     }
2352     if (url_f){
2353         i_ugetc = i_getc; i_getc = url_getc;
2354         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2355     }
2356 #endif
2357 #ifdef NUMCHAR_OPTION
2358     if (numchar_f){
2359         i_ngetc = i_getc; i_getc = numchar_getc;
2360         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2361     }
2362 #endif
2363 #ifdef UNICODE_NORMALIZATION
2364     if (nfc_f && input_f == UTF8_INPUT){
2365         i_nfc_getc = i_getc; i_getc = nfc_getc;
2366         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2367     }
2368 #endif
2369     if (mime_f && mimebuf_f==FIXED_MIME) {
2370         i_mgetc = i_getc; i_getc = mime_getc;
2371         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2372     }
2373     if (broken_f & 1) {
2374         i_bgetc = i_getc; i_getc = broken_getc;
2375         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2376     }
2377     if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
2378         set_iconv(-TRUE, e_iconv);
2379     } else if (input_f == SJIS_INPUT) {
2380         set_iconv(-TRUE, s_iconv);
2381 #ifdef UTF8_INPUT_ENABLE
2382     } else if (input_f == UTF8_INPUT) {
2383         set_iconv(-TRUE, w_iconv);
2384     } else if (input_f == UTF16BE_INPUT) {
2385         set_iconv(-TRUE, w_iconv16);
2386     } else if (input_f == UTF16LE_INPUT) {
2387         set_iconv(-TRUE, w_iconv16);
2388 #endif
2389     } else {
2390         set_iconv(FALSE, e_iconv);
2391     }
2392
2393     {
2394         struct input_code *p = input_code_list;
2395         while (p->name){
2396             status_reinit(p++);
2397         }
2398     }
2399 }
2400
2401 /*
2402    Conversion main loop. Code detection only. 
2403  */
2404
2405 nkf_char kanji_convert(FILE *f)
2406 {
2407     nkf_char    c1,
2408                     c2, c3;
2409     int is_8bit = FALSE;
2410
2411     module_connection();
2412     c2 = 0;
2413
2414     if(input_f == SJIS_INPUT
2415 #ifdef UTF8_INPUT_ENABLE
2416        || input_f == UTF8_INPUT || input_f == UTF16BE_INPUT || input_f == UTF16LE_INPUT
2417 #endif
2418       ){
2419         is_8bit = TRUE;
2420     }
2421
2422
2423     input_mode = ASCII;
2424     output_mode = ASCII;
2425     shift_mode = FALSE;
2426
2427 #define NEXT continue      /* no output, get next */
2428 #define SEND ;             /* output c1 and c2, get next */
2429 #define LAST break         /* end of loop, go closing  */
2430
2431     while ((c1 = (*i_getc)(f)) != EOF) {
2432 #ifdef INPUT_CODE_FIX
2433         if (!input_f)
2434 #endif
2435             code_status(c1);
2436         if (c2) {
2437             /* second byte */
2438             if (c2 > DEL) {
2439                 /* in case of 8th bit is on */
2440                 if (!estab_f&&!mime_decode_mode) {
2441                     /* in case of not established yet */
2442                     /* It is still ambiguious */
2443                     if (h_conv(f, c2, c1)==EOF) 
2444                         LAST;
2445                     else 
2446                         c2 = 0;
2447                     NEXT;
2448                 } else
2449                     /* in case of already established */
2450                     if (c1 < AT) {
2451                         /* ignore bogus code */
2452                         c2 = 0;
2453                         NEXT;
2454                     } else
2455                         SEND;
2456             } else
2457                 /* second byte, 7 bit code */
2458                 /* it might be kanji shitfted */
2459                 if ((c1 == DEL) || (c1 <= SPACE)) {
2460                     /* ignore bogus first code */
2461                     c2 = 0;
2462                     NEXT;
2463                 } else
2464                     SEND;
2465         } else {
2466             /* first byte */
2467             if (
2468 #ifdef UTF8_INPUT_ENABLE
2469                 iconv == w_iconv16
2470 #else
2471                 0
2472 #endif
2473                 ) {
2474                 c2 = c1;
2475                 c1 = (*i_getc)(f);
2476                 SEND;
2477 #ifdef NUMCHAR_OPTION
2478             } else if (is_unicode_capsule(c1)){
2479                 SEND;
2480 #endif
2481             } else if (c1 > DEL) {
2482                 /* 8 bit code */
2483                 if (!estab_f && !iso8859_f) {
2484                     /* not established yet */
2485                     if (!is_8bit) is_8bit = TRUE;
2486                     c2 = c1;
2487                     NEXT;
2488                 } else { /* estab_f==TRUE */
2489                     if (iso8859_f) {
2490                         c2 = ISO8859_1;
2491                         c1 &= 0x7f;
2492                         SEND;
2493                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2494                         /* SJIS X0201 Case... */
2495                         if(iso2022jp_f && x0201_f==NO_X0201) {
2496                             (*oconv)(GETA1, GETA2);
2497                             NEXT;
2498                         } else {
2499                             c2 = X0201;
2500                             c1 &= 0x7f;
2501                             SEND;
2502                         }
2503                     } else if (c1==SSO && iconv != s_iconv) {
2504                         /* EUC X0201 Case */
2505                         c1 = (*i_getc)(f);  /* skip SSO */
2506                         code_status(c1);
2507                         if (SSP<=c1 && c1<0xe0) {
2508                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2509                                 (*oconv)(GETA1, GETA2);
2510                                 NEXT;
2511                             } else {
2512                                 c2 = X0201;
2513                                 c1 &= 0x7f;
2514                                 SEND;
2515                             }
2516                         } else  { /* bogus code, skip SSO and one byte */
2517                             NEXT;
2518                         }
2519                     } else {
2520                        /* already established */
2521                        c2 = c1;
2522                        NEXT;
2523                     }
2524                 }
2525             } else if ((c1 > SPACE) && (c1 != DEL)) {
2526                 /* in case of Roman characters */
2527                 if (shift_mode) { 
2528                     /* output 1 shifted byte */
2529                     if (iso8859_f) {
2530                         c2 = ISO8859_1;
2531                         SEND;
2532                     } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2533                       /* output 1 shifted byte */
2534                         if(iso2022jp_f && x0201_f==NO_X0201) {
2535                             (*oconv)(GETA1, GETA2);
2536                             NEXT;
2537                         } else {
2538                             c2 = X0201;
2539                             SEND;
2540                         }
2541                     } else {
2542                         /* look like bogus code */
2543                         NEXT;
2544                     }
2545                 } else if (input_mode == X0208 || input_mode == X0212 ||
2546                            input_mode == X0213_1 || input_mode == X0213_2) {
2547                     /* in case of Kanji shifted */
2548                     c2 = c1;
2549                     NEXT;
2550                 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2551                     /* Check MIME code */
2552                     if ((c1 = (*i_getc)(f)) == EOF) {
2553                         (*oconv)(0, '=');
2554                         LAST;
2555                     } else if (c1 == '?') {
2556                         /* =? is mime conversion start sequence */
2557                         if(mime_f == STRICT_MIME) {
2558                             /* check in real detail */
2559                             if (mime_begin_strict(f) == EOF) 
2560                                 LAST;
2561                             else
2562                                 NEXT;
2563                         } else if (mime_begin(f) == EOF) 
2564                             LAST;
2565                         else
2566                             NEXT;
2567                     } else {
2568                         (*oconv)(0, '=');
2569                         (*i_ungetc)(c1,f);
2570                         NEXT;
2571                     }
2572                 } else {
2573                     /* normal ASCII code */ 
2574                     SEND;
2575                 }
2576             } else if (!is_8bit && c1 == SI) {
2577                 shift_mode = FALSE; 
2578                 NEXT;
2579             } else if (!is_8bit && c1 == SO) {
2580                 shift_mode = TRUE; 
2581                 NEXT;
2582             } else if (!is_8bit && c1 == ESC ) {
2583                 if ((c1 = (*i_getc)(f)) == EOF) {
2584                     /*  (*oconv)(0, ESC); don't send bogus code */
2585                     LAST;
2586                 } else if (c1 == '$') {
2587                     if ((c1 = (*i_getc)(f)) == EOF) {
2588                         /*
2589                         (*oconv)(0, ESC); don't send bogus code 
2590                         (*oconv)(0, '$'); */
2591                         LAST;
2592                     } else if (c1 == '@'|| c1 == 'B') {
2593                         /* This is kanji introduction */
2594                         input_mode = X0208;
2595                         shift_mode = FALSE;
2596                         set_input_codename("ISO-2022-JP");
2597 #ifdef CHECK_OPTION
2598                         debug(input_codename);
2599 #endif
2600                         NEXT;
2601                     } else if (c1 == '(') {
2602                         if ((c1 = (*i_getc)(f)) == EOF) {
2603                             /* don't send bogus code 
2604                             (*oconv)(0, ESC);
2605                             (*oconv)(0, '$');
2606                             (*oconv)(0, '(');
2607                                 */
2608                             LAST;
2609                         } else if (c1 == '@'|| c1 == 'B') {
2610                             /* This is kanji introduction */
2611                             input_mode = X0208;
2612                             shift_mode = FALSE;
2613                             NEXT;
2614 #ifdef X0212_ENABLE
2615                         } else if (c1 == 'D'){
2616                             input_mode = X0212;
2617                             shift_mode = FALSE;
2618                             NEXT;
2619 #endif /* X0212_ENABLE */
2620                         } else if (c1 == (X0213_1&0x7F)){
2621                             input_mode = X0213_1;
2622                             shift_mode = FALSE;
2623                             NEXT;
2624                         } else if (c1 == (X0213_2&0x7F)){
2625                             input_mode = X0213_2;
2626                             shift_mode = FALSE;
2627                             NEXT;
2628                         } else {
2629                             /* could be some special code */
2630                             (*oconv)(0, ESC);
2631                             (*oconv)(0, '$');
2632                             (*oconv)(0, '(');
2633                             (*oconv)(0, c1);
2634                             NEXT;
2635                         }
2636                     } else if (broken_f&0x2) {
2637                         /* accept any ESC-(-x as broken code ... */
2638                         input_mode = X0208;
2639                         shift_mode = FALSE;
2640                         NEXT;
2641                     } else {
2642                         (*oconv)(0, ESC);
2643                         (*oconv)(0, '$');
2644                         (*oconv)(0, c1);
2645                         NEXT;
2646                     }
2647                 } else if (c1 == '(') {
2648                     if ((c1 = (*i_getc)(f)) == EOF) {
2649                         /* don't send bogus code 
2650                         (*oconv)(0, ESC);
2651                         (*oconv)(0, '('); */
2652                         LAST;
2653                     } else {
2654                         if (c1 == 'I') {
2655                             /* This is X0201 kana introduction */
2656                             input_mode = X0201; shift_mode = X0201;
2657                             NEXT;
2658                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2659                             /* This is X0208 kanji introduction */
2660                             input_mode = ASCII; shift_mode = FALSE;
2661                             NEXT;
2662                         } else if (broken_f&0x2) {
2663                             input_mode = ASCII; shift_mode = FALSE;
2664                             NEXT;
2665                         } else {
2666                             (*oconv)(0, ESC);
2667                             (*oconv)(0, '(');
2668                             /* maintain various input_mode here */
2669                             SEND;
2670                         }
2671                     }
2672                } else if ( c1 == 'N' || c1 == 'n' ){
2673                    /* SS2 */
2674                    c3 = (*i_getc)(f);  /* skip SS2 */
2675                    if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2676                        c1 = c3;
2677                        c2 = X0201;
2678                        SEND;
2679                    }else{
2680                        (*i_ungetc)(c3, f);
2681                        /* lonely ESC  */
2682                        (*oconv)(0, ESC);
2683                        SEND;
2684                    }
2685                 } else {
2686                     /* lonely ESC  */
2687                     (*oconv)(0, ESC);
2688                     SEND;
2689                 }
2690             } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2691                 input_mode = ASCII; set_iconv(FALSE, 0);
2692                 SEND;
2693             } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2694                 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2695                     i_ungetc(SPACE,f);
2696                     continue;
2697                 } else {
2698                     i_ungetc(c1,f);
2699                 }
2700                 c1 = NL;
2701                 SEND;
2702             } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2703                 if ((c1=(*i_getc)(f))!=EOF) {
2704                     if (c1==SPACE) {
2705                         i_ungetc(SPACE,f);
2706                         continue;
2707                     } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2708                         i_ungetc(SPACE,f);
2709                         continue;
2710                     } else {
2711                         i_ungetc(c1,f);
2712                     }
2713                     i_ungetc(NL,f);
2714                 } else {
2715                     i_ungetc(c1,f);
2716                 }
2717                 c1 = CR;
2718                 SEND;
2719             } else 
2720                 SEND;
2721         }
2722         /* send: */
2723         switch(input_mode){
2724         case ASCII:
2725             if ((*iconv)(c2, c1, 0) < 0){  /* can be EUC/SJIS */
2726                 nkf_char c0 = (*i_getc)(f);
2727                 if (c0 != EOF){
2728                     code_status(c0);
2729                     (*iconv)(c2, c1, c0);
2730                 }
2731             }
2732             break;
2733         case X0208:
2734         case X0213_1:
2735             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2736             break;
2737 #ifdef X0212_ENABLE
2738         case X0212:
2739             (*oconv)(PREFIX_EUCG3 | c2, c1);
2740             break;
2741 #endif /* X0212_ENABLE */
2742         case X0213_2:
2743             (*oconv)(PREFIX_EUCG3 | c2, c1);
2744             break;
2745         default:
2746             (*oconv)(input_mode, c1);  /* other special case */
2747         }
2748
2749         c2 = 0;
2750         continue;
2751         /* goto next_word */
2752     }
2753
2754     /* epilogue */
2755     (*iconv)(EOF, 0, 0);
2756     if (!is_inputcode_set)
2757     {
2758         if (is_8bit) {
2759             struct input_code *p = input_code_list;
2760             struct input_code *result = p;
2761             while (p->name){
2762                 if (p->score < result->score) result = p;
2763                 ++p;
2764             }
2765             set_input_codename(result->name);
2766         }
2767     }
2768     return 1;
2769 }
2770
2771 nkf_char
2772 h_conv(FILE *f, nkf_char c2, nkf_char c1)
2773 {
2774     nkf_char    wc,c3;
2775
2776
2777     /** it must NOT be in the kanji shifte sequence      */
2778     /** it must NOT be written in JIS7                   */
2779     /** and it must be after 2 byte 8bit code            */
2780
2781     hold_count = 0;
2782     push_hold_buf(c2);
2783     push_hold_buf(c1);
2784
2785     while ((c1 = (*i_getc)(f)) != EOF) {
2786         if (c1 == ESC){
2787             (*i_ungetc)(c1,f);
2788             break;
2789         }
2790         code_status(c1);
2791         if (push_hold_buf(c1) == EOF || estab_f){
2792             break;
2793         }
2794     }
2795
2796     if (!estab_f){
2797         struct input_code *p = input_code_list;
2798         struct input_code *result = p;
2799         if (c1 == EOF){
2800             code_status(c1);
2801         }
2802         while (p->name){
2803             if (p->score < result->score){
2804                 result = p;
2805             }
2806             ++p;
2807         }
2808         set_iconv(FALSE, result->iconv_func);
2809     }
2810
2811
2812     /** now,
2813      ** 1) EOF is detected, or
2814      ** 2) Code is established, or
2815      ** 3) Buffer is FULL (but last word is pushed)
2816      **
2817      ** in 1) and 3) cases, we continue to use
2818      ** Kanji codes by oconv and leave estab_f unchanged.
2819      **/
2820
2821     c3=c1;
2822     wc = 0;
2823     while (wc < hold_count){
2824         c2 = hold_buf[wc++];
2825         if (c2 <= DEL
2826 #ifdef NUMCHAR_OPTION
2827             || is_unicode_capsule(c2)
2828 #endif
2829             ){
2830             (*iconv)(0, c2, 0);
2831             continue;
2832         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2833             (*iconv)(X0201, c2, 0);
2834             continue;
2835         }
2836         if (wc < hold_count){
2837             c1 = hold_buf[wc++];
2838         }else{
2839             c1 = (*i_getc)(f);
2840             if (c1 == EOF){
2841                 c3 = EOF;
2842                 break;
2843             }
2844             code_status(c1);
2845         }
2846         if ((*iconv)(c2, c1, 0) < 0){
2847             nkf_char c0;
2848             if (wc < hold_count){
2849                 c0 = hold_buf[wc++];
2850             }else{
2851                 c0 = (*i_getc)(f);
2852                 if (c0 == EOF){
2853                     c3 = EOF;
2854                     break;
2855                 }
2856                 code_status(c0);
2857             }
2858             (*iconv)(c2, c1, c0);
2859         }
2860     }
2861     return c3;
2862 }
2863
2864
2865
2866 nkf_char
2867 push_hold_buf(nkf_char c2)
2868 {
2869     if (hold_count >= HOLD_SIZE*2)
2870         return (EOF);
2871     hold_buf[hold_count++] = (unsigned char)c2;
2872     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2873 }
2874
2875 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
2876 {
2877 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
2878     nkf_char val;
2879 #endif
2880     static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
2881 #ifdef SHIFTJIS_CP932
2882     if (cp51932_f && is_ibmext_in_sjis(c2)){
2883 #if 0
2884         extern const unsigned short shiftjis_cp932[3][189];
2885 #endif
2886         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2887         if (val){
2888             c2 = val >> 8;
2889             c1 = val & 0xff;
2890         }
2891     }
2892 #endif /* SHIFTJIS_CP932 */
2893 #ifdef X0212_ENABLE
2894     if (!x0213_f && is_ibmext_in_sjis(c2)){
2895 #if 0
2896         extern const unsigned short shiftjis_x0212[3][189];
2897 #endif
2898         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2899         if (val){
2900             if (val > 0x7FFF){
2901                 c2 = PREFIX_EUCG3 | (val >> 8);
2902                 c1 = val & 0xff;
2903             }else{
2904                 c2 = val >> 8;
2905                 c1 = val & 0xff;
2906             }
2907             if (p2) *p2 = c2;
2908             if (p1) *p1 = c1;
2909             return 0;
2910         }
2911     }
2912 #endif
2913     if(c2 >= 0x80){
2914         if(x0213_f && c2 >= 0xF0){
2915             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
2916                 c2 = PREFIX_EUCG3 | 0x20 + shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
2917             }else{ /* 78<=k<=94 */
2918                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
2919                 if (0x9E < c1) c2++;
2920             }
2921         }else{
2922             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
2923             if (0x9E < c1) c2++;
2924         }
2925         if (c1 < 0x9F)
2926             c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
2927         else {
2928             c1 = c1 - 0x7E;
2929         }
2930     }
2931
2932 #ifdef X0212_ENABLE
2933     c2 = x0212_unshift(c2);
2934 #endif
2935     if (p2) *p2 = c2;
2936     if (p1) *p1 = c1;
2937     return 0;
2938 }
2939
2940 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2941 {
2942     if (c2 == X0201) {
2943         c1 &= 0x7f;
2944     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2945         /* NOP */
2946     } else {
2947         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2948         if (ret) return ret;
2949     }
2950     (*oconv)(c2, c1);
2951     return 0;
2952 }
2953
2954 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2955 {
2956     if (c2 == X0201) {
2957         c1 &= 0x7f;
2958 #ifdef X0212_ENABLE
2959     }else if (c2 == 0x8f){
2960         if (c0 == 0){
2961             return -1;
2962         }
2963         c2 = (c2 << 8) | (c1 & 0x7f);
2964         c1 = c0 & 0x7f;
2965 #ifdef SHIFTJIS_CP932
2966         if (cp51932_f){
2967             nkf_char s2, s1;
2968             if (e2s_conv(c2, c1, &s2, &s1) == 0){
2969                 s2e_conv(s2, s1, &c2, &c1);
2970                 if (c2 < 0x100){
2971                     c1 &= 0x7f;
2972                     c2 &= 0x7f;
2973                 }
2974             }
2975         }
2976 #endif /* SHIFTJIS_CP932 */
2977 #endif /* X0212_ENABLE */
2978     } else if (c2 == SSO){
2979         c2 = X0201;
2980         c1 &= 0x7f;
2981     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2982         /* NOP */
2983     } else {
2984         c1 &= 0x7f;
2985         c2 &= 0x7f;
2986     }
2987     (*oconv)(c2, c1);
2988     return 0;
2989 }
2990
2991 #ifdef UTF8_INPUT_ENABLE
2992 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
2993 {
2994     nkf_char ret = 0;
2995
2996     if (!c1){
2997         *p2 = 0;
2998         *p1 = c2;
2999     }else if (0xc0 <= c2 && c2 <= 0xef) {
3000         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3001 #ifdef NUMCHAR_OPTION
3002         if (ret > 0){
3003             if (p2) *p2 = 0;
3004             if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
3005             ret = 0;
3006         }
3007 #endif
3008     }
3009     return ret;
3010 }
3011
3012 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3013 {
3014     nkf_char ret = 0;
3015     
3016     /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
3017     if(ignore_zwnbsp_f){
3018         ignore_zwnbsp_f = FALSE;
3019         if(c2 == 0xef && c1 == 0xbb && c0 == 0xbf)
3020             return 0;
3021     }
3022     
3023     if (c2 == 0) /* 0x00-0x7f */
3024         c1 &= 0x7F; /* 1byte */
3025     else if (c0 == 0){
3026         if ((c2 & 0xe0) == 0xc0){ /* 0xc0-0xdf */
3027             /* 2ytes */
3028             if((c2 & 0xFE) == 0xC0 || c1 < 0x80 || 0xBF < c1) return 0;
3029         }else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */
3030             return -1; /* 3bytes */
3031 #ifdef __COMMENT__
3032         else if (0xf0 <= c2)
3033             return 0; /* 4,5,6bytes */
3034         else if ((c2 & 0xc0) == 0x80) /* 0x80-0xbf */
3035             return 0; /* trail byte */
3036 #endif
3037         else return 0;
3038     }else{
3039         /* must be 3bytes */
3040         if(c2 == 0xE0){
3041             if(c1 < 0xA0 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
3042                 return 0;
3043         }else if(c2 == 0xED){
3044             if(c1 < 0x80 || 0x9F < c1 || c0 < 0x80 || 0xBF < c0)
3045                 return 0;
3046         }else if((c2 & 0xf0) == 0xe0){
3047             if(c1 < 0x80 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
3048                 return 0;
3049         }else return 0;
3050     }
3051     if (c2 == 0 || c2 == EOF){
3052     } else {
3053         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3054     }
3055     if (ret == 0){
3056         (*oconv)(c2, c1);
3057     }
3058     return ret;
3059 }
3060 #endif
3061
3062 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3063 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3064 {
3065     val &= VALUE_MASK;
3066     if (val < 0x80){
3067         *p2 = val;
3068         *p1 = 0;
3069         *p0 = 0;
3070     }else if (val < 0x800){
3071         *p2 = 0xc0 | (val >> 6);
3072         *p1 = 0x80 | (val & 0x3f);
3073         *p0 = 0;
3074     } else if (val <= NKF_INT32_C(0xFFFF)) {
3075         *p2 = 0xe0 | (val >> 12);
3076         *p1 = 0x80 | ((val >> 6) & 0x3f);
3077         *p0 = 0x80 | (val        & 0x3f);
3078     } else {
3079         *p2 = 0;
3080         *p1 = 0;
3081         *p0 = 0;
3082     }
3083 }
3084 #endif
3085
3086 #ifdef UTF8_INPUT_ENABLE
3087 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3088 {
3089     nkf_char val;
3090     if (c2 >= 0xf0){
3091         val = -1;
3092     }else if (c2 >= 0xe0){
3093         val = (c2 & 0x0f) << 12;
3094         val |= (c1 & 0x3f) << 6;
3095         val |= (c0 & 0x3f);
3096     }else if (c2 >= 0xc0){
3097         val = (c2 & 0x1f) << 6;
3098         val |= (c1 & 0x3f);
3099     }else{
3100         val = c2;
3101     }
3102     return val;
3103 }
3104
3105 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3106 {
3107     nkf_char c2, c1, c0;
3108     nkf_char ret = 0;
3109     val &= VALUE_MASK;
3110     if (val < 0x80){
3111         *p2 = 0;
3112         *p1 = val;
3113     }else{
3114         w16w_conv(val, &c2, &c1, &c0);
3115         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3116 #ifdef NUMCHAR_OPTION
3117         if (ret > 0){
3118             *p2 = 0;
3119             *p1 = CLASS_UTF16 | val;
3120             ret = 0;
3121         }
3122 #endif
3123     }
3124     return ret;
3125 }
3126 #endif
3127
3128 #ifdef UTF8_INPUT_ENABLE
3129 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3130 {
3131     nkf_char ret;
3132
3133     /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
3134     if(ignore_zwnbsp_f){
3135         ignore_zwnbsp_f = FALSE;
3136         if (c2==0376 && c1==0377){
3137             utf16_mode = UTF16BE_INPUT;
3138             return 0;
3139         }else if(c2==0377 && c1==0376){
3140             utf16_mode = UTF16LE_INPUT;
3141             return 0;
3142         }
3143     }
3144     if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
3145         nkf_char tmp;
3146         tmp=c1; c1=c2; c2=tmp;
3147     }
3148     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3149         (*oconv)(c2, c1);
3150         return 0;
3151     }else if((c2>>3)==27){ /* surrogate pair */
3152         return 1;
3153     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3154     if (ret) return ret;
3155     (*oconv)(c2, c1);
3156     return 0;
3157 }
3158
3159 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3160 {
3161 #if 0
3162     extern const unsigned short *const utf8_to_euc_2bytes[];
3163     extern const unsigned short *const utf8_to_euc_2bytes_ms[];
3164     extern const unsigned short *const utf8_to_euc_2bytes_932[];
3165     extern const unsigned short *const *const utf8_to_euc_3bytes[];
3166     extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
3167     extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
3168 #endif
3169     const unsigned short *const *pp;
3170     const unsigned short *const *const *ppp;
3171     static const int no_best_fit_chars_table_C2[] =
3172     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3173         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3174         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3175         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3176     static const int no_best_fit_chars_table_C2_ms[] =
3177     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3178         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3179         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3180         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3181     static const int no_best_fit_chars_table_932_C2[] =
3182     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3183         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3184         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3185         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3186     static const int no_best_fit_chars_table_932_C3[] =
3187     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3188         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3189         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3190         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3191     nkf_char ret = 0;
3192
3193     if(c2 < 0x80){
3194         *p2 = 0;
3195         *p1 = c2;
3196     }else if(c2 < 0xe0){
3197         if(no_best_fit_chars_f){
3198             if(ms_ucs_map_f == UCS_MAP_CP932){
3199                 switch(c2){
3200                 case 0xC2:
3201                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3202                     break;
3203                 case 0xC3:
3204                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3205                     break;
3206                 }
3207             }else if(cp51932_f){
3208                 switch(c2){
3209                 case 0xC2:
3210                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3211                     break;
3212                 case 0xC3:
3213                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3214                     break;
3215                 }
3216             }else if(ms_ucs_map_f == UCS_MAP_MS){
3217                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3218             }
3219         }
3220         pp =
3221             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3222             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3223             utf8_to_euc_2bytes;
3224         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3225     }else if(c0 < 0xF0){
3226         if(no_best_fit_chars_f){
3227             if(ms_ucs_map_f == UCS_MAP_CP932){
3228                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3229             }else if(ms_ucs_map_f == UCS_MAP_MS){
3230                 switch(c2){
3231                 case 0xE2:
3232                     switch(c1){
3233                     case 0x80:
3234                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3235                         break;
3236                     case 0x88:
3237                         if(c0 == 0x92) return 1;
3238                         break;
3239                     }
3240                     break;
3241                 case 0xE3:
3242                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3243                     break;
3244                 }
3245             }else{
3246                 switch(c2){
3247                 case 0xE2:
3248                     switch(c1){
3249                     case 0x80:
3250                         if(c0 == 0x95) return 1;
3251                         break;
3252                     case 0x88:
3253                         if(c0 == 0xA5) return 1;
3254                         break;
3255                     }
3256                     break;
3257                 case 0xEF:
3258                     switch(c1){
3259                     case 0xBC:
3260                         if(c0 == 0x8D) return 1;
3261                         break;
3262                     case 0xBD:
3263                         if(c0 == 0x9E && cp51932_f) return 1;
3264                         break;
3265                     case 0xBF:
3266                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3267                         break;
3268                     }
3269                     break;
3270                 }
3271             }
3272         }
3273         ppp =
3274             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3275             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3276             utf8_to_euc_3bytes;
3277         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3278     }else return -1;
3279 #ifdef SHIFTJIS_CP932
3280     if (!ret && cp51932_f && is_eucg3(*p2)) {
3281         nkf_char s2, s1;
3282         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3283             s2e_conv(s2, s1, p2, p1);
3284         }else{
3285             ret = 1;
3286         }
3287     }
3288 #endif
3289     return ret;
3290 }
3291
3292 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3293 {
3294     nkf_char c2;
3295     const unsigned short *p;
3296     unsigned short val;
3297
3298     if (pp == 0) return 1;
3299
3300     c1 -= 0x80;
3301     if (c1 < 0 || psize <= c1) return 1;
3302     p = pp[c1];
3303     if (p == 0)  return 1;
3304
3305     c0 -= 0x80;
3306     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3307     val = p[c0];
3308     if (val == 0) return 1;
3309     if (no_cp932ext_f && (
3310         (val>>8) == 0x2D || /* NEC special characters */
3311         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3312         )) return 1;
3313
3314     c2 = val >> 8;
3315    if (val > 0x7FFF){
3316         c2 &= 0x7f;
3317         c2 |= PREFIX_EUCG3;
3318     }
3319     if (c2 == SO) c2 = X0201;
3320     c1 = val & 0x7f;
3321     if (p2) *p2 = c2;
3322     if (p1) *p1 = c1;
3323     return 0;
3324 }
3325
3326 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3327 {
3328     const char *hex = "0123456789ABCDEF";
3329     int shift = 20;
3330     c &= VALUE_MASK;
3331     while(shift >= 0){
3332         if(c >= 1<<shift){
3333             while(shift >= 0){
3334                 (*f)(0, hex[(c>>shift)&0xF]);
3335                 shift -= 4;
3336             }
3337         }else{
3338             shift -= 4;
3339         }
3340     }
3341     return;
3342 }
3343
3344 void encode_fallback_html(nkf_char c)
3345 {
3346     (*oconv)(0, '&');
3347     (*oconv)(0, '#');
3348     c &= VALUE_MASK;
3349     if(c >= NKF_INT32_C(1000000))
3350         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3351     if(c >= NKF_INT32_C(100000))
3352         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3353     if(c >= 10000)
3354         (*oconv)(0, 0x30+(c/10000  )%10);
3355     if(c >= 1000)
3356         (*oconv)(0, 0x30+(c/1000   )%10);
3357     if(c >= 100)
3358         (*oconv)(0, 0x30+(c/100    )%10);
3359     if(c >= 10)
3360         (*oconv)(0, 0x30+(c/10     )%10);
3361     if(c >= 0)
3362         (*oconv)(0, 0x30+ c         %10);
3363     (*oconv)(0, ';');
3364     return;
3365 }
3366
3367 void encode_fallback_xml(nkf_char c)
3368 {
3369     (*oconv)(0, '&');
3370     (*oconv)(0, '#');
3371     (*oconv)(0, 'x');
3372     nkf_each_char_to_hex(oconv, c);
3373     (*oconv)(0, ';');
3374     return;
3375 }
3376
3377 void encode_fallback_java(nkf_char c)
3378 {
3379     const char *hex = "0123456789ABCDEF";
3380     (*oconv)(0, '\\');
3381     c &= VALUE_MASK;
3382     if(!is_unicode_bmp(c)){
3383         (*oconv)(0, 'U');
3384         (*oconv)(0, '0');
3385         (*oconv)(0, '0');
3386         (*oconv)(0, hex[(c>>20)&0xF]);
3387         (*oconv)(0, hex[(c>>16)&0xF]);
3388     }else{
3389         (*oconv)(0, 'u');
3390     }
3391     (*oconv)(0, hex[(c>>12)&0xF]);
3392     (*oconv)(0, hex[(c>> 8)&0xF]);
3393     (*oconv)(0, hex[(c>> 4)&0xF]);
3394     (*oconv)(0, hex[ c     &0xF]);
3395     return;
3396 }
3397
3398 void encode_fallback_perl(nkf_char c)
3399 {
3400     (*oconv)(0, '\\');
3401     (*oconv)(0, 'x');
3402     (*oconv)(0, '{');
3403     nkf_each_char_to_hex(oconv, c);
3404     (*oconv)(0, '}');
3405     return;
3406 }
3407
3408 void encode_fallback_subchar(nkf_char c)
3409 {
3410     c = unicode_subchar;
3411     (*oconv)((c>>8)&0xFF, c&0xFF);
3412     return;
3413 }
3414 #endif
3415
3416 #ifdef UTF8_OUTPUT_ENABLE
3417 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3418 {
3419 #if 0
3420     extern const unsigned short euc_to_utf8_1byte[];
3421     extern const unsigned short *const euc_to_utf8_2bytes[];
3422     extern const unsigned short *const euc_to_utf8_2bytes_ms[];
3423     extern const unsigned short *const x0212_to_utf8_2bytes[];
3424 #endif
3425     const unsigned short *p;
3426
3427     if (c2 == X0201) {
3428         p = euc_to_utf8_1byte;
3429 #ifdef X0212_ENABLE
3430     } else if (is_eucg3(c2)){
3431         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3432             return 0xA6;
3433         }
3434         c2 = (c2&0x7f) - 0x21;
3435         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3436             p = x0212_to_utf8_2bytes[c2];
3437         else
3438             return 0;
3439 #endif
3440     } else {
3441         c2 &= 0x7f;
3442         c2 = (c2&0x7f) - 0x21;
3443         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3444             p = ms_ucs_map_f != UCS_MAP_ASCII ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
3445         else
3446             return 0;
3447     }
3448     if (!p) return 0;
3449     c1 = (c1 & 0x7f) - 0x21;
3450     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3451         return p[c1];
3452     return 0;
3453 }
3454
3455 void w_oconv(nkf_char c2, nkf_char c1)
3456 {
3457     nkf_char c0;
3458     nkf_char val;
3459     if (c2 == EOF) {
3460         (*o_putc)(EOF);
3461         return;
3462     }
3463
3464     if (unicode_bom_f==2) {
3465         (*o_putc)('\357');
3466         (*o_putc)('\273');
3467         (*o_putc)('\277');
3468         unicode_bom_f=1;
3469     }
3470
3471 #ifdef NUMCHAR_OPTION
3472     if (c2 == 0 && is_unicode_capsule(c1)){
3473         val &= VALUE_MASK;
3474         if (val < 0x80){
3475             (*o_putc)(val);
3476         }else if (val < 0x800){
3477             (*o_putc)(0xC0 | (val >> 6));
3478             (*o_putc)(0x80 | (val & 0x3f));
3479         } else if (val <= NKF_INT32_C(0xFFFF)) {
3480             (*o_putc)(0xE0 | (val >> 12));
3481             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3482             (*o_putc)(0x80 | (val        & 0x3f));
3483         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3484             (*o_putc)(0xE0 | ( val>>18));
3485             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3486             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3487             (*o_putc)(0x80 | ( val      & 0x3f));
3488         }
3489         return;
3490     }
3491 #endif
3492
3493     if (c2 == 0) { 
3494         output_mode = ASCII;
3495         (*o_putc)(c1);
3496     } else if (c2 == ISO8859_1) {
3497         output_mode = ISO8859_1;
3498         (*o_putc)(c1 | 0x080);
3499     } else {
3500         output_mode = UTF8;
3501         val = e2w_conv(c2, c1);
3502         if (val){
3503             w16w_conv(val, &c2, &c1, &c0);
3504             (*o_putc)(c2);
3505             if (c1){
3506                 (*o_putc)(c1);
3507                 if (c0) (*o_putc)(c0);
3508             }
3509         }
3510     }
3511 }
3512
3513 void w_oconv16(nkf_char c2, nkf_char c1)
3514 {
3515     if (c2 == EOF) {
3516         (*o_putc)(EOF);
3517         return;
3518     }    
3519
3520     if (unicode_bom_f==2) {
3521         if (w_oconv16_LE){
3522             (*o_putc)((unsigned char)'\377');
3523             (*o_putc)('\376');
3524         }else{
3525             (*o_putc)('\376');
3526             (*o_putc)((unsigned char)'\377');
3527         }
3528         unicode_bom_f=1;
3529     }
3530
3531     if (c2 == ISO8859_1) {
3532         c2 = 0;
3533         c1 |= 0x80;
3534 #ifdef NUMCHAR_OPTION
3535     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3536         if (is_unicode_bmp(c1)) {
3537             c2 = (c1 >> 8) & 0xff;
3538             c1 &= 0xff;
3539         } else {
3540             c1 &= VALUE_MASK;
3541             if (c1 <= UNICODE_MAX) {
3542                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3543                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3544                 if (w_oconv16_LE){
3545                     (*o_putc)(c2 & 0xff);
3546                     (*o_putc)((c2 >> 8) & 0xff);
3547                     (*o_putc)(c1 & 0xff);
3548                     (*o_putc)((c1 >> 8) & 0xff);
3549                 }else{
3550                     (*o_putc)(c2 & 0xff);
3551                     (*o_putc)((c2 >> 8) & 0xff);
3552                     (*o_putc)(c1 & 0xff);
3553                     (*o_putc)((c1 >> 8) & 0xff);
3554                 }
3555             }
3556             return;
3557         }
3558 #endif
3559     } else if (c2) {
3560         nkf_char val = e2w_conv(c2, c1);
3561         c2 = (val >> 8) & 0xff;
3562         c1 = val & 0xff;
3563     }
3564     if (w_oconv16_LE){
3565         (*o_putc)(c1);
3566         (*o_putc)(c2);
3567     }else{
3568         (*o_putc)(c2);
3569         (*o_putc)(c1);
3570     }
3571 }
3572
3573 #endif
3574
3575 void e_oconv(nkf_char c2, nkf_char c1)
3576 {
3577 #ifdef NUMCHAR_OPTION
3578     if (c2 == 0 && is_unicode_capsule(c1)){
3579         w16e_conv(c1, &c2, &c1);
3580         if (c2 == 0 && is_unicode_capsule(c1)){
3581             if(encode_fallback)(*encode_fallback)(c1);
3582             return;
3583         }
3584     }
3585 #endif
3586     if (c2 == EOF) {
3587         (*o_putc)(EOF);
3588         return;
3589     } else if (c2 == 0) { 
3590         output_mode = ASCII;
3591         (*o_putc)(c1);
3592     } else if (c2 == X0201) {
3593         output_mode = JAPANESE_EUC;
3594         (*o_putc)(SSO); (*o_putc)(c1|0x80);
3595     } else if (c2 == ISO8859_1) {
3596         output_mode = ISO8859_1;
3597         (*o_putc)(c1 | 0x080);
3598 #ifdef X0212_ENABLE
3599     } else if (is_eucg3(c2)){
3600         output_mode = JAPANESE_EUC;
3601 #ifdef SHIFTJIS_CP932
3602         if (cp51932_f){
3603             nkf_char s2, s1;
3604             if (e2s_conv(c2, c1, &s2, &s1) == 0){
3605                 s2e_conv(s2, s1, &c2, &c1);
3606             }
3607         }
3608 #endif
3609         if (c2 == 0) {
3610             output_mode = ASCII;
3611             (*o_putc)(c1);
3612         }else if (is_eucg3(c2)){
3613             if (x0212_f){
3614                 (*o_putc)(0x8f);
3615                 (*o_putc)((c2 & 0x7f) | 0x080);
3616                 (*o_putc)(c1 | 0x080);
3617             }
3618         }else{
3619             (*o_putc)((c2 & 0x7f) | 0x080);
3620             (*o_putc)(c1 | 0x080);
3621         }
3622 #endif
3623     } else {
3624         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
3625             set_iconv(FALSE, 0);
3626             return; /* too late to rescue this char */
3627         }
3628         output_mode = JAPANESE_EUC;
3629         (*o_putc)(c2 | 0x080);
3630         (*o_putc)(c1 | 0x080);
3631     }
3632 }
3633
3634 #ifdef X0212_ENABLE
3635 nkf_char x0212_shift(nkf_char c)
3636 {
3637     nkf_char ret = c;
3638     c &= 0x7f;
3639     if (is_eucg3(ret)){
3640         if (0x75 <= c && c <= 0x7f){
3641             ret = c + (0x109 - 0x75);
3642         }
3643     }else{
3644         if (0x75 <= c && c <= 0x7f){