OSDN Git Service

* X0201_DEFAULT gives whether JIS X 0201 Katakana will be converted or not.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.150 2007/11/30 15:59:05 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-11-30"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #ifndef MIME_DECODE_DEFAULT
44 #define MIME_DECODE_DEFAULT STRICT_MIME
45 #endif
46 #ifndef X0201_DEFAULT
47 #define X0201_DEFAULT TRUE
48 #endif
49
50 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
51 #define MSDOS
52 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
53 #define __WIN32__
54 #endif
55 #endif
56
57 #ifdef PERL_XS
58 #undef OVERWRITE
59 #endif
60
61 #ifndef PERL_XS
62 #include <stdio.h>
63 #endif
64
65 #include <stdlib.h>
66 #include <string.h>
67
68 #if defined(MSDOS) || defined(__OS2__)
69 #include <fcntl.h>
70 #include <io.h>
71 #if defined(_MSC_VER) || defined(__WATCOMC__)
72 #define mktemp _mktemp
73 #endif
74 #endif
75
76 #ifdef MSDOS
77 #ifdef LSI_C
78 #define setbinmode(fp) fsetbin(fp)
79 #elif defined(__DJGPP__)
80 #include <libc/dosio.h>
81 #define setbinmode(fp) djgpp_setbinmode(fp)
82 #else /* Microsoft C, Turbo C */
83 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
84 #endif
85 #else /* UNIX */
86 #define setbinmode(fp)
87 #endif
88
89 #if defined(__DJGPP__)
90 void  djgpp_setbinmode(FILE *fp)
91 {
92     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
93     int fd, m;
94     fd = fileno(fp);
95     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
96     __file_handle_set(fd, m);
97 }
98 #endif
99
100 #ifdef _IOFBF /* SysV and MSDOS, Windows */
101 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
102 #else /* BSD */
103 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
104 #endif
105
106 /*Borland C++ 4.5 EasyWin*/
107 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
108 #define         EASYWIN
109 #ifndef __WIN16__
110 #define __WIN16__
111 #endif
112 #include <windows.h>
113 #endif
114
115 #ifdef OVERWRITE
116 /* added by satoru@isoternet.org */
117 #if defined(__EMX__)
118 #include <sys/types.h>
119 #endif
120 #include <sys/stat.h>
121 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
122 #include <unistd.h>
123 #if defined(__WATCOMC__)
124 #include <sys/utime.h>
125 #else
126 #include <utime.h>
127 #endif
128 #else /* defined(MSDOS) */
129 #ifdef __WIN32__
130 #ifdef __BORLANDC__ /* BCC32 */
131 #include <utime.h>
132 #else /* !defined(__BORLANDC__) */
133 #include <sys/utime.h>
134 #endif /* (__BORLANDC__) */
135 #else /* !defined(__WIN32__) */
136 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
137 #include <sys/utime.h>
138 #elif defined(__TURBOC__) /* BCC */
139 #include <utime.h>
140 #elif defined(LSI_C) /* LSI C */
141 #endif /* (__WIN32__) */
142 #endif
143 #endif
144 #endif
145
146 #define         FALSE   0
147 #define         TRUE    1
148
149 /* state of output_mode and input_mode
150
151    c2           0 means ASCII
152                 X0201
153                 ISO8859_1
154                 X0208
155                 EOF      all termination
156    c1           32bit data
157
158  */
159
160 #define         ASCII           0
161 #define         X0208           1
162 #define         X0201           2
163 #define         ISO8859_1       8
164 #define         X0212      0x2844
165 #define         X0213_1    0x284F
166 #define         X0213_2    0x2850
167
168 /* Input Assumption */
169
170 #define         JIS_INPUT       4
171 #define         EUC_INPUT      16
172 #define         SJIS_INPUT      5
173 #define         LATIN1_INPUT    6
174 #define         FIXED_MIME      7
175 #define         STRICT_MIME     8
176
177 /* MIME ENCODE */
178
179 #define         ISO2022JP       9
180 #define         JAPANESE_EUC   10
181 #define         SHIFT_JIS      11
182
183 #define         UTF8           12
184 #define         UTF8_INPUT     13
185 #define         UTF16_INPUT    1015
186 #define         UTF32_INPUT    1017
187
188 /* byte order */
189
190 #define         ENDIAN_BIG      1234
191 #define         ENDIAN_LITTLE   4321
192 #define         ENDIAN_2143     2143
193 #define         ENDIAN_3412     3412
194
195 /* ASCII CODE */
196
197 #define         BS      0x08
198 #define         TAB     0x09
199 #define         LF      0x0a
200 #define         CR      0x0d
201 #define         ESC     0x1b
202 #define         SP      0x20
203 #define         AT      0x40
204 #define         SSP     0xa0
205 #define         DEL     0x7f
206 #define         SI      0x0f
207 #define         SO      0x0e
208 #define         SSO     0x8e
209 #define         SS3     0x8f
210 #define         CRLF    0x0D0A
211
212 #define         is_alnum(c)  \
213             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
214
215 /* I don't trust portablity of toupper */
216 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
217 #define nkf_isoctal(c)  ('0'<=c && c<='7')
218 #define nkf_isdigit(c)  ('0'<=c && c<='9')
219 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
220 #define nkf_isblank(c) (c == SP || c == TAB)
221 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
222 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
223 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
224 #define nkf_isprint(c) (SP<=c && c<='~')
225 #define nkf_isgraph(c) ('!'<=c && c<='~')
226 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
227                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
228                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
229 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
230 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
231 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
232     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
233      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
234
235 #define CP932_TABLE_BEGIN 0xFA
236 #define CP932_TABLE_END   0xFC
237 #define CP932INV_TABLE_BEGIN 0xED
238 #define CP932INV_TABLE_END   0xEE
239 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
240
241 #define         HOLD_SIZE       1024
242 #if defined(INT_IS_SHORT)
243 #define         IOBUF_SIZE      2048
244 #else
245 #define         IOBUF_SIZE      16384
246 #endif
247
248 #define         DEFAULT_J       'B'
249 #define         DEFAULT_R       'B'
250
251 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
252 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
253
254 #define         RANGE_NUM_MAX   18
255 #define         GETA1   0x22
256 #define         GETA2   0x2e
257
258
259 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
260 #define sizeof_euc_to_utf8_1byte 94
261 #define sizeof_euc_to_utf8_2bytes 94
262 #define sizeof_utf8_to_euc_C2 64
263 #define sizeof_utf8_to_euc_E5B8 64
264 #define sizeof_utf8_to_euc_2bytes 112
265 #define sizeof_utf8_to_euc_3bytes 16
266 #endif
267
268 /* MIME preprocessor */
269
270 #ifdef EASYWIN /*Easy Win */
271 extern POINT _BufferSize;
272 #endif
273
274 struct input_code{
275     char *name;
276     nkf_char stat;
277     nkf_char score;
278     nkf_char index;
279     nkf_char buf[3];
280     void (*status_func)(struct input_code *, nkf_char);
281     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
282     int _file_stat;
283 };
284
285 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
286
287 #ifndef PERL_XS
288 static const char *CopyRight = COPY_RIGHT;
289 #endif
290 #if !defined(PERL_XS) && !defined(WIN32DLL)
291 static  nkf_char     noconvert(FILE *f);
292 #endif
293 static  void    module_connection(void);
294 static  nkf_char     kanji_convert(FILE *f);
295 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
296 static  nkf_char     push_hold_buf(nkf_char c2);
297 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
298 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
299 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
300 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
301 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
302 /* UCS Mapping
303  * 0: Shift_JIS, eucJP-ascii
304  * 1: eucJP-ms
305  * 2: CP932, CP51932
306  * 3: CP10001
307  */
308 #define UCS_MAP_ASCII   0
309 #define UCS_MAP_MS      1
310 #define UCS_MAP_CP932   2
311 #define UCS_MAP_CP10001 3
312 static int ms_ucs_map_f = UCS_MAP_ASCII;
313 #endif
314 #ifdef UTF8_INPUT_ENABLE
315 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
316 static  int     no_cp932ext_f = FALSE;
317 /* ignore ZERO WIDTH NO-BREAK SPACE */
318 static  int     no_best_fit_chars_f = FALSE;
319 static  int     input_endian = ENDIAN_BIG;
320 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
321 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
322 static  void    encode_fallback_html(nkf_char c);
323 static  void    encode_fallback_xml(nkf_char c);
324 static  void    encode_fallback_java(nkf_char c);
325 static  void    encode_fallback_perl(nkf_char c);
326 static  void    encode_fallback_subchar(nkf_char c);
327 static  void    (*encode_fallback)(nkf_char c) = NULL;
328 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
329 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
330 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
331 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
332 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
333 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
334 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
335 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
336 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
337 static  void    w_status(struct input_code *, nkf_char);
338 #endif
339 #ifdef UTF8_OUTPUT_ENABLE
340 static  int     output_bom_f = FALSE;
341 static  int     output_endian = ENDIAN_BIG;
342 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
343 static  void    w_oconv(nkf_char c2,nkf_char c1);
344 static  void    w_oconv16(nkf_char c2,nkf_char c1);
345 static  void    w_oconv32(nkf_char c2,nkf_char c1);
346 #endif
347 static  void    e_oconv(nkf_char c2,nkf_char c1);
348 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
349 static  void    s_oconv(nkf_char c2,nkf_char c1);
350 static  void    j_oconv(nkf_char c2,nkf_char c1);
351 static  void    fold_conv(nkf_char c2,nkf_char c1);
352 static  void    nl_conv(nkf_char c2,nkf_char c1);
353 static  void    z_conv(nkf_char c2,nkf_char c1);
354 static  void    rot_conv(nkf_char c2,nkf_char c1);
355 static  void    hira_conv(nkf_char c2,nkf_char c1);
356 static  void    base64_conv(nkf_char c2,nkf_char c1);
357 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
358 static  void    no_connection(nkf_char c2,nkf_char c1);
359 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
360
361 static  void    code_score(struct input_code *ptr);
362 static  void    code_status(nkf_char c);
363
364 static  void    std_putc(nkf_char c);
365 static  nkf_char     std_getc(FILE *f);
366 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
367
368 static  nkf_char     broken_getc(FILE *f);
369 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
370
371 static  nkf_char     mime_begin(FILE *f);
372 static  nkf_char     mime_getc(FILE *f);
373 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
374
375 static  void    switch_mime_getc(void);
376 static  void    unswitch_mime_getc(void);
377 static  nkf_char     mime_begin_strict(FILE *f);
378 static  nkf_char     mime_getc_buf(FILE *f);
379 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
380 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
381
382 static  nkf_char     base64decode(nkf_char c);
383 static  void    mime_prechar(nkf_char c2, nkf_char c1);
384 static  void    mime_putc(nkf_char c);
385 static  void    open_mime(nkf_char c);
386 static  void    close_mime(void);
387 static  void    eof_mime(void);
388 static  void    mimeout_addchar(nkf_char c);
389 #ifndef PERL_XS
390 static  void    usage(void);
391 static  void    version(void);
392 #endif
393 static  void    options(unsigned char *c);
394 static  void    reinit(void);
395
396 /* buffers */
397
398 #if !defined(PERL_XS) && !defined(WIN32DLL)
399 static unsigned char   stdibuf[IOBUF_SIZE];
400 static unsigned char   stdobuf[IOBUF_SIZE];
401 #endif
402 static unsigned char   hold_buf[HOLD_SIZE*2];
403 static int             hold_count = 0;
404
405 /* MIME preprocessor fifo */
406
407 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
408 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
409 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
410 static unsigned char           mime_buf[MIME_BUF_SIZE];
411 static unsigned int            mime_top = 0;
412 static unsigned int            mime_last = 0;  /* decoded */
413 static unsigned int            mime_input = 0; /* undecoded */
414 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
415
416 /* flags */
417 static int             unbuf_f = FALSE;
418 static int             estab_f = FALSE;
419 static int             nop_f = FALSE;
420 static int             binmode_f = TRUE;       /* binary mode */
421 static int             rot_f = FALSE;          /* rot14/43 mode */
422 static int             hira_f = FALSE;          /* hira/kata henkan */
423 static int             input_f = FALSE;        /* non fixed input code  */
424 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
425 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
426 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
427 static int             mimebuf_f = FALSE;      /* MIME buffered input */
428 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
429 static int             iso8859_f = FALSE;      /* ISO8859 through */
430 static int             mimeout_f = FALSE;       /* base64 mode */
431 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
432 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
433
434 #ifdef UNICODE_NORMALIZATION
435 static int nfc_f = FALSE;
436 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
437 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
438 static nkf_char nfc_getc(FILE *f);
439 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
440 #endif
441
442 #ifdef INPUT_OPTION
443 static int cap_f = FALSE;
444 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
445 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
446 static nkf_char cap_getc(FILE *f);
447 static nkf_char cap_ungetc(nkf_char c,FILE *f);
448
449 static int url_f = FALSE;
450 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
451 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
452 static nkf_char url_getc(FILE *f);
453 static nkf_char url_ungetc(nkf_char c,FILE *f);
454 #endif
455
456 #if defined(INT_IS_SHORT)
457 #define NKF_INT32_C(n)   (n##L)
458 #else
459 #define NKF_INT32_C(n)   (n)
460 #endif
461 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
462 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
463 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
464 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
465 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
466 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
467 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
468
469 #ifdef NUMCHAR_OPTION
470 static int numchar_f = FALSE;
471 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
472 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
473 static nkf_char numchar_getc(FILE *f);
474 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
475 #endif
476
477 #ifdef CHECK_OPTION
478 static int noout_f = FALSE;
479 static void no_putc(nkf_char c);
480 static int debug_f = FALSE;
481 static void debug(const char *str);
482 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
483 #endif
484
485 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
486 #if !defined PERL_XS
487 static  void    print_guessed_code(char *filename);
488 #endif
489 static  void    set_input_codename(char *codename);
490
491 #ifdef EXEC_IO
492 static int exec_f = 0;
493 #endif
494
495 #ifdef SHIFTJIS_CP932
496 /* invert IBM extended characters to others */
497 static int cp51932_f = FALSE;
498
499 /* invert NEC-selected IBM extended characters to IBM extended characters */
500 static int cp932inv_f = TRUE;
501
502 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
503 #endif /* SHIFTJIS_CP932 */
504
505 #ifdef X0212_ENABLE
506 static int x0212_f = FALSE;
507 static nkf_char x0212_shift(nkf_char c);
508 static nkf_char x0212_unshift(nkf_char c);
509 #endif
510 static int x0213_f = FALSE;
511
512 static unsigned char prefix_table[256];
513
514 static void set_code_score(struct input_code *ptr, nkf_char score);
515 static void clr_code_score(struct input_code *ptr, nkf_char score);
516 static void status_disable(struct input_code *ptr);
517 static void status_push_ch(struct input_code *ptr, nkf_char c);
518 static void status_clear(struct input_code *ptr);
519 static void status_reset(struct input_code *ptr);
520 static void status_reinit(struct input_code *ptr);
521 static void status_check(struct input_code *ptr, nkf_char c);
522 static void e_status(struct input_code *, nkf_char);
523 static void s_status(struct input_code *, nkf_char);
524
525 struct input_code input_code_list[] = {
526     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
527     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
528 #ifdef UTF8_INPUT_ENABLE
529     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
530     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
531     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
532 #endif
533     {0}
534 };
535
536 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
537 static int              base64_count = 0;
538
539 /* X0208 -> ASCII converter */
540
541 /* fold parameter */
542 static int             f_line = 0;    /* chars in line */
543 static int             f_prev = 0;
544 static int             fold_preserve_f = FALSE; /* preserve new lines */
545 static int             fold_f  = FALSE;
546 static int             fold_len  = 0;
547
548 /* options */
549 static unsigned char   kanji_intro = DEFAULT_J;
550 static unsigned char   ascii_intro = DEFAULT_R;
551
552 /* Folding */
553
554 #define FOLD_MARGIN  10
555 #define DEFAULT_FOLD 60
556
557 static int             fold_margin  = FOLD_MARGIN;
558
559 /* converters */
560
561 #ifdef DEFAULT_CODE_JIS
562 #   define  DEFAULT_CONV j_oconv
563 #endif
564 #ifdef DEFAULT_CODE_SJIS
565 #   define  DEFAULT_CONV s_oconv
566 #endif
567 #ifdef DEFAULT_CODE_EUC
568 #   define  DEFAULT_CONV e_oconv
569 #endif
570 #ifdef DEFAULT_CODE_UTF8
571 #   define  DEFAULT_CONV w_oconv
572 #endif
573
574 /* process default */
575 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
576
577 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
578 /* s_iconv or oconv */
579 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
580
581 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
582 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
583 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
584 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
585 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
586 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
587 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
588
589 /* static redirections */
590
591 static  void   (*o_putc)(nkf_char c) = std_putc;
592
593 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
594 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
595
596 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
597 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
598
599 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
600
601 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
602 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
603
604 /* for strict mime */
605 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
606 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
607
608 /* Global states */
609 static int output_mode = ASCII,    /* output kanji mode */
610            input_mode =  ASCII,    /* input kanji mode */
611            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
612 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
613
614 /* X0201 / X0208 conversion tables */
615
616 /* X0201 kana conversion table */
617 /* 90-9F A0-DF */
618 static const unsigned char cv[]= {
619     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
620     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
621     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
622     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
623     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
624     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
625     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
626     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
627     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
628     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
629     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
630     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
631     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
632     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
633     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
634     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
635     0x00,0x00};
636
637
638 /* X0201 kana conversion table for daguten */
639 /* 90-9F A0-DF */
640 static const unsigned char dv[]= {
641     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
646     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
647     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
648     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
649     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
650     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
651     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
652     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
653     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
654     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
656     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
657     0x00,0x00};
658
659 /* X0201 kana conversion table for han-daguten */
660 /* 90-9F A0-DF */
661 static const unsigned char ev[]= {
662     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
672     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
673     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
674     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
675     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
677     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
678     0x00,0x00};
679
680
681 /* X0208 kigou conversion table */
682 /* 0x8140 - 0x819e */
683 static const unsigned char fv[] = {
684
685     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
686     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
687     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
688     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
689     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
690     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
691     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
692     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
693     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
694     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
695     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
696     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
697 } ;
698
699
700
701 static int             file_out_f = FALSE;
702 #ifdef OVERWRITE
703 static int             overwrite_f = FALSE;
704 static int             preserve_time_f = FALSE;
705 static int             backup_f = FALSE;
706 static char            *backup_suffix = "";
707 static char *get_backup_filename(const char *suffix, const char *filename);
708 #endif
709
710 static int nlmode_f = 0;   /* CR, LF, CRLF */
711 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
712 static nkf_char prev_cr = 0; /* CR or 0 */
713 #ifdef EASYWIN /*Easy Win */
714 static int             end_check;
715 #endif /*Easy Win */
716
717 #define STD_GC_BUFSIZE (256)
718 nkf_char std_gc_buf[STD_GC_BUFSIZE];
719 nkf_char std_gc_ndx;
720
721 #ifdef WIN32DLL
722 #include "nkf32dll.c"
723 #elif defined(PERL_XS)
724 #else /* WIN32DLL */
725 int main(int argc, char **argv)
726 {
727     FILE  *fin;
728     unsigned char  *cp;
729
730     char *outfname = NULL;
731     char *origfname;
732
733 #ifdef EASYWIN /*Easy Win */
734     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
735 #endif
736
737     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
738         cp = (unsigned char *)*argv;
739         options(cp);
740         if (guess_f) {
741 #ifdef CHECK_OPTION
742             int debug_f_back = debug_f;
743 #endif
744 #ifdef EXEC_IO
745             int exec_f_back = exec_f;
746 #endif
747 #ifdef X0212_ENABLE
748             int x0212_f_back = x0212_f;
749 #endif
750             int x0213_f_back = x0213_f;
751             int guess_f_back = guess_f;
752             reinit();
753             guess_f = guess_f_back;
754             mime_f = FALSE;
755 #ifdef CHECK_OPTION
756             debug_f = debug_f_back;
757 #endif
758 #ifdef EXEC_IO
759             exec_f = exec_f_back;
760 #endif
761 #ifdef X0212_ENABLE
762             x0212_f = x0212_f_back;
763 #endif
764             x0213_f = x0213_f_back;
765         }
766 #ifdef EXEC_IO
767         if (exec_f){
768             int fds[2], pid;
769             if (pipe(fds) < 0 || (pid = fork()) < 0){
770                 abort();
771             }
772             if (pid == 0){
773                 if (exec_f > 0){
774                     close(fds[0]);
775                     dup2(fds[1], 1);
776                 }else{
777                     close(fds[1]);
778                     dup2(fds[0], 0);
779                 }
780                 execvp(argv[1], &argv[1]);
781             }
782             if (exec_f > 0){
783                 close(fds[1]);
784                 dup2(fds[0], 0);
785             }else{
786                 close(fds[0]);
787                 dup2(fds[1], 1);
788             }
789             argc = 0;
790             break;
791         }
792 #endif
793     }
794
795     if (binmode_f == TRUE)
796 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
797     if (freopen("","wb",stdout) == NULL)
798         return (-1);
799 #else
800     setbinmode(stdout);
801 #endif
802
803     if (unbuf_f)
804       setbuf(stdout, (char *) NULL);
805     else
806       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
807
808     if (argc == 0) {
809       if (binmode_f == TRUE)
810 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
811       if (freopen("","rb",stdin) == NULL) return (-1);
812 #else
813       setbinmode(stdin);
814 #endif
815       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
816       if (nop_f)
817           noconvert(stdin);
818       else {
819           kanji_convert(stdin);
820           if (guess_f) print_guessed_code(NULL);
821       }
822     } else {
823       int nfiles = argc;
824         int is_argument_error = FALSE;
825       while (argc--) {
826             input_codename = NULL;
827             input_newline = 0;
828 #ifdef CHECK_OPTION
829             iconv_for_check = 0;
830 #endif
831           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
832               perror(*--argv);
833                 *argv++;
834                 is_argument_error = TRUE;
835                 continue;
836           } else {
837 #ifdef OVERWRITE
838               int fd = 0;
839               int fd_backup = 0;
840 #endif
841
842 /* reopen file for stdout */
843               if (file_out_f == TRUE) {
844 #ifdef OVERWRITE
845                   if (overwrite_f){
846                       outfname = malloc(strlen(origfname)
847                                         + strlen(".nkftmpXXXXXX")
848                                         + 1);
849                       if (!outfname){
850                           perror(origfname);
851                           return -1;
852                       }
853                       strcpy(outfname, origfname);
854 #ifdef MSDOS
855                       {
856                           int i;
857                           for (i = strlen(outfname); i; --i){
858                               if (outfname[i - 1] == '/'
859                                   || outfname[i - 1] == '\\'){
860                                   break;
861                               }
862                           }
863                           outfname[i] = '\0';
864                       }
865                       strcat(outfname, "ntXXXXXX");
866                       mktemp(outfname);
867                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
868                                 S_IREAD | S_IWRITE);
869 #else
870                       strcat(outfname, ".nkftmpXXXXXX");
871                       fd = mkstemp(outfname);
872 #endif
873                       if (fd < 0
874                           || (fd_backup = dup(fileno(stdout))) < 0
875                           || dup2(fd, fileno(stdout)) < 0
876                           ){
877                           perror(origfname);
878                           return -1;
879                       }
880                   }else
881 #endif
882                   if(argc == 1) {
883                       outfname = *argv++;
884                       argc--;
885                   } else {
886                       outfname = "nkf.out";
887                   }
888
889                   if(freopen(outfname, "w", stdout) == NULL) {
890                       perror (outfname);
891                       return (-1);
892                   }
893                   if (binmode_f == TRUE) {
894 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
895                       if (freopen("","wb",stdout) == NULL)
896                            return (-1);
897 #else
898                       setbinmode(stdout);
899 #endif
900                   }
901               }
902               if (binmode_f == TRUE)
903 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
904                  if (freopen("","rb",fin) == NULL)
905                     return (-1);
906 #else
907                  setbinmode(fin);
908 #endif
909               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
910               if (nop_f)
911                   noconvert(fin);
912               else {
913                   char *filename = NULL;
914                   kanji_convert(fin);
915                   if (nfiles > 1) filename = origfname;
916                   if (guess_f) print_guessed_code(filename);
917               }
918               fclose(fin);
919 #ifdef OVERWRITE
920               if (overwrite_f) {
921                   struct stat     sb;
922 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
923                   time_t tb[2];
924 #else
925                   struct utimbuf  tb;
926 #endif
927
928                   fflush(stdout);
929                   close(fd);
930                   if (dup2(fd_backup, fileno(stdout)) < 0){
931                       perror("dup2");
932                   }
933                   if (stat(origfname, &sb)) {
934                       fprintf(stderr, "Can't stat %s\n", origfname);
935                   }
936                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
937                   if (chmod(outfname, sb.st_mode)) {
938                       fprintf(stderr, "Can't set permission %s\n", outfname);
939                   }
940
941                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
942                     if(preserve_time_f){
943 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
944                         tb[0] = tb[1] = sb.st_mtime;
945                         if (utime(outfname, tb)) {
946                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
947                         }
948 #else
949                         tb.actime  = sb.st_atime;
950                         tb.modtime = sb.st_mtime;
951                         if (utime(outfname, &tb)) {
952                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
953                         }
954 #endif
955                     }
956                     if(backup_f){
957                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
958 #ifdef MSDOS
959                         unlink(backup_filename);
960 #endif
961                         if (rename(origfname, backup_filename)) {
962                             perror(backup_filename);
963                             fprintf(stderr, "Can't rename %s to %s\n",
964                                     origfname, backup_filename);
965                         }
966                     }else{
967 #ifdef MSDOS
968                         if (unlink(origfname)){
969                             perror(origfname);
970                         }
971 #endif
972                     }
973                   if (rename(outfname, origfname)) {
974                       perror(origfname);
975                       fprintf(stderr, "Can't rename %s to %s\n",
976                               outfname, origfname);
977                   }
978                   free(outfname);
979               }
980 #endif
981           }
982       }
983         if (is_argument_error)
984             return(-1);
985     }
986 #ifdef EASYWIN /*Easy Win */
987     if (file_out_f == FALSE)
988         scanf("%d",&end_check);
989     else
990         fclose(stdout);
991 #else /* for Other OS */
992     if (file_out_f == TRUE)
993         fclose(stdout);
994 #endif /*Easy Win */
995     return (0);
996 }
997 #endif /* WIN32DLL */
998
999 #ifdef OVERWRITE
1000 char *get_backup_filename(const char *suffix, const char *filename)
1001 {
1002     char *backup_filename;
1003     int asterisk_count = 0;
1004     int i, j;
1005     int filename_length = strlen(filename);
1006
1007     for(i = 0; suffix[i]; i++){
1008         if(suffix[i] == '*') asterisk_count++;
1009     }
1010
1011     if(asterisk_count){
1012         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1013         if (!backup_filename){
1014             perror("Can't malloc backup filename.");
1015             return NULL;
1016         }
1017
1018         for(i = 0, j = 0; suffix[i];){
1019             if(suffix[i] == '*'){
1020                 backup_filename[j] = '\0';
1021                 strncat(backup_filename, filename, filename_length);
1022                 i++;
1023                 j += filename_length;
1024             }else{
1025                 backup_filename[j++] = suffix[i++];
1026             }
1027         }
1028         backup_filename[j] = '\0';
1029     }else{
1030         j = strlen(suffix) + filename_length;
1031         backup_filename = malloc( + 1);
1032         strcpy(backup_filename, filename);
1033         strcat(backup_filename, suffix);
1034         backup_filename[j] = '\0';
1035     }
1036     return backup_filename;
1037 }
1038 #endif
1039
1040 static const struct {
1041     const char *name;
1042     const char *alias;
1043 } long_option[] = {
1044     {"ic=", ""},
1045     {"oc=", ""},
1046     {"base64","jMB"},
1047     {"euc","e"},
1048     {"euc-input","E"},
1049     {"fj","jm"},
1050     {"help","v"},
1051     {"jis","j"},
1052     {"jis-input","J"},
1053     {"mac","sLm"},
1054     {"mime","jM"},
1055     {"mime-input","m"},
1056     {"msdos","sLw"},
1057     {"sjis","s"},
1058     {"sjis-input","S"},
1059     {"unix","eLu"},
1060     {"version","V"},
1061     {"windows","sLw"},
1062     {"hiragana","h1"},
1063     {"katakana","h2"},
1064     {"katakana-hiragana","h3"},
1065     {"guess=", ""},
1066     {"guess", "g"},
1067     {"cp932", ""},
1068     {"no-cp932", ""},
1069 #ifdef X0212_ENABLE
1070     {"x0212", ""},
1071 #endif
1072 #ifdef UTF8_OUTPUT_ENABLE
1073     {"utf8", "w"},
1074     {"utf16", "w16"},
1075     {"ms-ucs-map", ""},
1076     {"fb-skip", ""},
1077     {"fb-html", ""},
1078     {"fb-xml", ""},
1079     {"fb-perl", ""},
1080     {"fb-java", ""},
1081     {"fb-subchar", ""},
1082     {"fb-subchar=", ""},
1083 #endif
1084 #ifdef UTF8_INPUT_ENABLE
1085     {"utf8-input", "W"},
1086     {"utf16-input", "W16"},
1087     {"no-cp932ext", ""},
1088     {"no-best-fit-chars",""},
1089 #endif
1090 #ifdef UNICODE_NORMALIZATION
1091     {"utf8mac-input", ""},
1092 #endif
1093 #ifdef OVERWRITE
1094     {"overwrite", ""},
1095     {"overwrite=", ""},
1096     {"in-place", ""},
1097     {"in-place=", ""},
1098 #endif
1099 #ifdef INPUT_OPTION
1100     {"cap-input", ""},
1101     {"url-input", ""},
1102 #endif
1103 #ifdef NUMCHAR_OPTION
1104     {"numchar-input", ""},
1105 #endif
1106 #ifdef CHECK_OPTION
1107     {"no-output", ""},
1108     {"debug", ""},
1109 #endif
1110 #ifdef SHIFTJIS_CP932
1111     {"cp932inv", ""},
1112 #endif
1113 #ifdef EXEC_IO
1114     {"exec-in", ""},
1115     {"exec-out", ""},
1116 #endif
1117     {"prefix=", ""},
1118 };
1119
1120 static int option_mode = 0;
1121
1122 void options(unsigned char *cp)
1123 {
1124     nkf_char i, j;
1125     unsigned char *p;
1126     unsigned char *cp_back = NULL;
1127     char codeset[32];
1128
1129     if (option_mode==1)
1130         return;
1131     while(*cp && *cp++!='-');
1132     while (*cp || cp_back) {
1133         if(!*cp){
1134             cp = cp_back;
1135             cp_back = NULL;
1136             continue;
1137         }
1138         p = 0;
1139         switch (*cp++) {
1140         case '-':  /* literal options */
1141             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1142                 option_mode = 1;
1143                 return;
1144             }
1145             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1146                 p = (unsigned char *)long_option[i].name;
1147                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1148                 if (*p == cp[j] || cp[j] == SP){
1149                     p = &cp[j] + 1;
1150                     break;
1151                 }
1152                 p = 0;
1153             }
1154             if (p == 0) {
1155                 fprintf(stderr, "unknown long option: --%s\n", cp);
1156                 return;
1157             }
1158             while(*cp && *cp != SP && cp++);
1159             if (long_option[i].alias[0]){
1160                 cp_back = cp;
1161                 cp = (unsigned char *)long_option[i].alias;
1162             }else{
1163                 if (strcmp(long_option[i].name, "ic=") == 0){
1164                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1165                         codeset[i] = nkf_toupper(p[i]);
1166                     }
1167                     codeset[i] = 0;
1168                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1169                         input_f = JIS_INPUT;
1170                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1171                       strcmp(codeset, "CP50220") == 0 ||
1172                       strcmp(codeset, "CP50221") == 0 ||
1173                       strcmp(codeset, "CP50222") == 0){
1174                         input_f = JIS_INPUT;
1175 #ifdef SHIFTJIS_CP932
1176                         cp51932_f = TRUE;
1177 #endif
1178 #ifdef UTF8_OUTPUT_ENABLE
1179                         ms_ucs_map_f = UCS_MAP_CP932;
1180 #endif
1181                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1182                         input_f = JIS_INPUT;
1183 #ifdef X0212_ENABLE
1184                         x0212_f = TRUE;
1185 #endif
1186                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1187                         input_f = JIS_INPUT;
1188 #ifdef X0212_ENABLE
1189                         x0212_f = TRUE;
1190 #endif
1191                         x0213_f = TRUE;
1192                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1193                         input_f = SJIS_INPUT;
1194                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1195                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1196                              strcmp(codeset, "CP932") == 0 ||
1197                              strcmp(codeset, "MS932") == 0){
1198                         input_f = SJIS_INPUT;
1199 #ifdef SHIFTJIS_CP932
1200                         cp51932_f = TRUE;
1201 #endif
1202 #ifdef UTF8_OUTPUT_ENABLE
1203                         ms_ucs_map_f = UCS_MAP_CP932;
1204 #endif
1205                     }else if(strcmp(codeset, "CP10001") == 0){
1206                         input_f = SJIS_INPUT;
1207 #ifdef SHIFTJIS_CP932
1208                         cp51932_f = TRUE;
1209 #endif
1210 #ifdef UTF8_OUTPUT_ENABLE
1211                         ms_ucs_map_f = UCS_MAP_CP10001;
1212 #endif
1213                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1214                              strcmp(codeset, "EUC-JP") == 0){
1215                         input_f = EUC_INPUT;
1216                     }else if(strcmp(codeset, "CP51932") == 0){
1217                         input_f = EUC_INPUT;
1218 #ifdef SHIFTJIS_CP932
1219                         cp51932_f = TRUE;
1220 #endif
1221 #ifdef UTF8_OUTPUT_ENABLE
1222                         ms_ucs_map_f = UCS_MAP_CP932;
1223 #endif
1224                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1225                              strcmp(codeset, "EUCJP-MS") == 0 ||
1226                              strcmp(codeset, "EUCJPMS") == 0){
1227                         input_f = EUC_INPUT;
1228 #ifdef SHIFTJIS_CP932
1229                         cp51932_f = FALSE;
1230 #endif
1231 #ifdef UTF8_OUTPUT_ENABLE
1232                         ms_ucs_map_f = UCS_MAP_MS;
1233 #endif
1234                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1235                              strcmp(codeset, "EUCJP-ASCII") == 0){
1236                         input_f = EUC_INPUT;
1237 #ifdef SHIFTJIS_CP932
1238                         cp51932_f = FALSE;
1239 #endif
1240 #ifdef UTF8_OUTPUT_ENABLE
1241                         ms_ucs_map_f = UCS_MAP_ASCII;
1242 #endif
1243                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1244                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1245                         input_f = SJIS_INPUT;
1246                         x0213_f = TRUE;
1247 #ifdef SHIFTJIS_CP932
1248                         cp51932_f = FALSE;
1249 #endif
1250                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1251                              strcmp(codeset, "EUC-JIS-2004") == 0){
1252                         input_f = EUC_INPUT;
1253                         x0213_f = TRUE;
1254 #ifdef SHIFTJIS_CP932
1255                         cp51932_f = FALSE;
1256 #endif
1257 #ifdef UTF8_INPUT_ENABLE
1258                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1259                              strcmp(codeset, "UTF-8N") == 0 ||
1260                              strcmp(codeset, "UTF-8-BOM") == 0){
1261                         input_f = UTF8_INPUT;
1262 #ifdef UNICODE_NORMALIZATION
1263                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1264                              strcmp(codeset, "UTF-8-MAC") == 0){
1265                         input_f = UTF8_INPUT;
1266                         nfc_f = TRUE;
1267 #endif
1268                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1269                              strcmp(codeset, "UTF-16BE") == 0 ||
1270                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1271                         input_f = UTF16_INPUT;
1272                         input_endian = ENDIAN_BIG;
1273                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1274                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1275                         input_f = UTF16_INPUT;
1276                         input_endian = ENDIAN_LITTLE;
1277                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1278                              strcmp(codeset, "UTF-32BE") == 0 ||
1279                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1280                         input_f = UTF32_INPUT;
1281                         input_endian = ENDIAN_BIG;
1282                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1283                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1284                         input_f = UTF32_INPUT;
1285                         input_endian = ENDIAN_LITTLE;
1286 #endif
1287                     } else {
1288                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1289                     }
1290                     continue;
1291                 }
1292                 if (strcmp(long_option[i].name, "oc=") == 0){
1293                     x0201_f = FALSE;
1294                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1295                         codeset[i] = nkf_toupper(p[i]);
1296                     }
1297                     codeset[i] = 0;
1298                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1299                         output_conv = j_oconv;
1300                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1301                         output_conv = j_oconv;
1302                         no_cp932ext_f = TRUE;
1303 #ifdef SHIFTJIS_CP932
1304                         cp932inv_f = FALSE;
1305 #endif
1306 #ifdef UTF8_OUTPUT_ENABLE
1307                         ms_ucs_map_f = UCS_MAP_CP932;
1308 #endif
1309                     }else if(strcmp(codeset, "CP50220") == 0){
1310                         output_conv = j_oconv;
1311                         x0201_f = TRUE;
1312 #ifdef SHIFTJIS_CP932
1313                         cp932inv_f = FALSE;
1314 #endif
1315 #ifdef UTF8_OUTPUT_ENABLE
1316                         ms_ucs_map_f = UCS_MAP_CP932;
1317 #endif
1318                     }else if(strcmp(codeset, "CP50221") == 0){
1319                         output_conv = j_oconv;
1320 #ifdef SHIFTJIS_CP932
1321                         cp932inv_f = FALSE;
1322 #endif
1323 #ifdef UTF8_OUTPUT_ENABLE
1324                         ms_ucs_map_f = UCS_MAP_CP932;
1325 #endif
1326                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1327                         output_conv = j_oconv;
1328 #ifdef X0212_ENABLE
1329                         x0212_f = TRUE;
1330 #endif
1331 #ifdef SHIFTJIS_CP932
1332                         cp932inv_f = FALSE;
1333 #endif
1334                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1335                         output_conv = j_oconv;
1336 #ifdef X0212_ENABLE
1337                         x0212_f = TRUE;
1338 #endif
1339                         x0213_f = TRUE;
1340 #ifdef SHIFTJIS_CP932
1341                         cp932inv_f = FALSE;
1342 #endif
1343                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1344                         output_conv = s_oconv;
1345                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1346                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1347                              strcmp(codeset, "CP932") == 0 ||
1348                              strcmp(codeset, "MS932") == 0){
1349                         output_conv = s_oconv;
1350 #ifdef UTF8_OUTPUT_ENABLE
1351                         ms_ucs_map_f = UCS_MAP_CP932;
1352 #endif
1353                     }else if(strcmp(codeset, "CP10001") == 0){
1354                         output_conv = s_oconv;
1355 #ifdef UTF8_OUTPUT_ENABLE
1356                         ms_ucs_map_f = UCS_MAP_CP10001;
1357 #endif
1358                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1359                              strcmp(codeset, "EUC-JP") == 0){
1360                         output_conv = e_oconv;
1361                     }else if(strcmp(codeset, "CP51932") == 0){
1362                         output_conv = e_oconv;
1363 #ifdef SHIFTJIS_CP932
1364                         cp932inv_f = FALSE;
1365 #endif
1366 #ifdef UTF8_OUTPUT_ENABLE
1367                         ms_ucs_map_f = UCS_MAP_CP932;
1368 #endif
1369                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1370                              strcmp(codeset, "EUCJP-MS") == 0 ||
1371                              strcmp(codeset, "EUCJPMS") == 0){
1372                         output_conv = e_oconv;
1373 #ifdef X0212_ENABLE
1374                         x0212_f = TRUE;
1375 #endif
1376 #ifdef UTF8_OUTPUT_ENABLE
1377                         ms_ucs_map_f = UCS_MAP_MS;
1378 #endif
1379                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1380                              strcmp(codeset, "EUCJP-ASCII") == 0){
1381                         output_conv = e_oconv;
1382 #ifdef X0212_ENABLE
1383                         x0212_f = TRUE;
1384 #endif
1385 #ifdef UTF8_OUTPUT_ENABLE
1386                         ms_ucs_map_f = UCS_MAP_ASCII;
1387 #endif
1388                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1389                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1390                         output_conv = s_oconv;
1391                         x0213_f = TRUE;
1392 #ifdef SHIFTJIS_CP932
1393                         cp932inv_f = FALSE;
1394 #endif
1395                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1396                              strcmp(codeset, "EUC-JIS-2004") == 0){
1397                         output_conv = e_oconv;
1398 #ifdef X0212_ENABLE
1399                         x0212_f = TRUE;
1400 #endif
1401                         x0213_f = TRUE;
1402 #ifdef SHIFTJIS_CP932
1403                         cp932inv_f = FALSE;
1404 #endif
1405 #ifdef UTF8_OUTPUT_ENABLE
1406                     }else if(strcmp(codeset, "UTF-8") == 0){
1407                         output_conv = w_oconv;
1408                     }else if(strcmp(codeset, "UTF-8N") == 0){
1409                         output_conv = w_oconv;
1410                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1411                         output_conv = w_oconv;
1412                         output_bom_f = TRUE;
1413                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1414                         output_conv = w_oconv16;
1415                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1416                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1417                         output_conv = w_oconv16;
1418                         output_bom_f = TRUE;
1419                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1420                         output_conv = w_oconv16;
1421                         output_endian = ENDIAN_LITTLE;
1422                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1423                         output_conv = w_oconv16;
1424                         output_endian = ENDIAN_LITTLE;
1425                         output_bom_f = TRUE;
1426                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1427                              strcmp(codeset, "UTF-32BE") == 0){
1428                         output_conv = w_oconv32;
1429                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1430                         output_conv = w_oconv32;
1431                         output_bom_f = TRUE;
1432                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1433                         output_conv = w_oconv32;
1434                         output_endian = ENDIAN_LITTLE;
1435                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1436                         output_conv = w_oconv32;
1437                         output_endian = ENDIAN_LITTLE;
1438                         output_bom_f = TRUE;
1439 #endif
1440                     } else {
1441                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1442                     }
1443                     continue;
1444                 }
1445                 if (strcmp(long_option[i].name, "guess=") == 0){
1446                     if (p[0] == '1') {
1447                         guess_f = 2;
1448                     } else {
1449                         guess_f = 1;
1450                     }
1451                     continue;
1452                 }
1453 #ifdef OVERWRITE
1454                 if (strcmp(long_option[i].name, "overwrite") == 0){
1455                     file_out_f = TRUE;
1456                     overwrite_f = TRUE;
1457                     preserve_time_f = TRUE;
1458                     continue;
1459                 }
1460                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1461                     file_out_f = TRUE;
1462                     overwrite_f = TRUE;
1463                     preserve_time_f = TRUE;
1464                     backup_f = TRUE;
1465                     backup_suffix = malloc(strlen((char *) p) + 1);
1466                     strcpy(backup_suffix, (char *) p);
1467                     continue;
1468                 }
1469                 if (strcmp(long_option[i].name, "in-place") == 0){
1470                     file_out_f = TRUE;
1471                     overwrite_f = TRUE;
1472                     preserve_time_f = FALSE;
1473                     continue;
1474                 }
1475                 if (strcmp(long_option[i].name, "in-place=") == 0){
1476                     file_out_f = TRUE;
1477                     overwrite_f = TRUE;
1478                     preserve_time_f = FALSE;
1479                     backup_f = TRUE;
1480                     backup_suffix = malloc(strlen((char *) p) + 1);
1481                     strcpy(backup_suffix, (char *) p);
1482                     continue;
1483                 }
1484 #endif
1485 #ifdef INPUT_OPTION
1486                 if (strcmp(long_option[i].name, "cap-input") == 0){
1487                     cap_f = TRUE;
1488                     continue;
1489                 }
1490                 if (strcmp(long_option[i].name, "url-input") == 0){
1491                     url_f = TRUE;
1492                     continue;
1493                 }
1494 #endif
1495 #ifdef NUMCHAR_OPTION
1496                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1497                     numchar_f = TRUE;
1498                     continue;
1499                 }
1500 #endif
1501 #ifdef CHECK_OPTION
1502                 if (strcmp(long_option[i].name, "no-output") == 0){
1503                     noout_f = TRUE;
1504                     continue;
1505                 }
1506                 if (strcmp(long_option[i].name, "debug") == 0){
1507                     debug_f = TRUE;
1508                     continue;
1509                 }
1510 #endif
1511                 if (strcmp(long_option[i].name, "cp932") == 0){
1512 #ifdef SHIFTJIS_CP932
1513                     cp51932_f = TRUE;
1514                     cp932inv_f = TRUE;
1515 #endif
1516 #ifdef UTF8_OUTPUT_ENABLE
1517                     ms_ucs_map_f = UCS_MAP_CP932;
1518 #endif
1519                     continue;
1520                 }
1521                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1522 #ifdef SHIFTJIS_CP932
1523                     cp51932_f = FALSE;
1524                     cp932inv_f = FALSE;
1525 #endif
1526 #ifdef UTF8_OUTPUT_ENABLE
1527                     ms_ucs_map_f = UCS_MAP_ASCII;
1528 #endif
1529                     continue;
1530                 }
1531 #ifdef SHIFTJIS_CP932
1532                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1533                     cp932inv_f = TRUE;
1534                     continue;
1535                 }
1536 #endif
1537
1538 #ifdef X0212_ENABLE
1539                 if (strcmp(long_option[i].name, "x0212") == 0){
1540                     x0212_f = TRUE;
1541                     continue;
1542                 }
1543 #endif
1544
1545 #ifdef EXEC_IO
1546                   if (strcmp(long_option[i].name, "exec-in") == 0){
1547                       exec_f = 1;
1548                       return;
1549                   }
1550                   if (strcmp(long_option[i].name, "exec-out") == 0){
1551                       exec_f = -1;
1552                       return;
1553                   }
1554 #endif
1555 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1556                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1557                     no_cp932ext_f = TRUE;
1558                     continue;
1559                 }
1560                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1561                     no_best_fit_chars_f = TRUE;
1562                     continue;
1563                 }
1564                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1565                     encode_fallback = NULL;
1566                     continue;
1567                 }
1568                 if (strcmp(long_option[i].name, "fb-html") == 0){
1569                     encode_fallback = encode_fallback_html;
1570                     continue;
1571                 }
1572                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1573                     encode_fallback = encode_fallback_xml;
1574                     continue;
1575                 }
1576                 if (strcmp(long_option[i].name, "fb-java") == 0){
1577                     encode_fallback = encode_fallback_java;
1578                     continue;
1579                 }
1580                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1581                     encode_fallback = encode_fallback_perl;
1582                     continue;
1583                 }
1584                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1585                     encode_fallback = encode_fallback_subchar;
1586                     continue;
1587                 }
1588                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1589                     encode_fallback = encode_fallback_subchar;
1590                     unicode_subchar = 0;
1591                     if (p[0] != '0'){
1592                         /* decimal number */
1593                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1594                             unicode_subchar *= 10;
1595                             unicode_subchar += hex2bin(p[i]);
1596                         }
1597                     }else if(p[1] == 'x' || p[1] == 'X'){
1598                         /* hexadecimal number */
1599                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1600                             unicode_subchar <<= 4;
1601                             unicode_subchar |= hex2bin(p[i]);
1602                         }
1603                     }else{
1604                         /* octal number */
1605                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1606                             unicode_subchar *= 8;
1607                             unicode_subchar += hex2bin(p[i]);
1608                         }
1609                     }
1610                     w16e_conv(unicode_subchar, &i, &j);
1611                     unicode_subchar = i<<8 | j;
1612                     continue;
1613                 }
1614 #endif
1615 #ifdef UTF8_OUTPUT_ENABLE
1616                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1617                     ms_ucs_map_f = UCS_MAP_MS;
1618                     continue;
1619                 }
1620 #endif
1621 #ifdef UNICODE_NORMALIZATION
1622                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1623                     input_f = UTF8_INPUT;
1624                     nfc_f = TRUE;
1625                     continue;
1626                 }
1627 #endif
1628                 if (strcmp(long_option[i].name, "prefix=") == 0){
1629                     if (nkf_isgraph(p[0])){
1630                         for (i = 1; nkf_isgraph(p[i]); i++){
1631                             prefix_table[p[i]] = p[0];
1632                         }
1633                     }
1634                     continue;
1635                 }
1636             }
1637             continue;
1638         case 'b':           /* buffered mode */
1639             unbuf_f = FALSE;
1640             continue;
1641         case 'u':           /* non bufferd mode */
1642             unbuf_f = TRUE;
1643             continue;
1644         case 't':           /* transparent mode */
1645             if (*cp=='1') {
1646                 /* alias of -t */
1647                 nop_f = TRUE;
1648                 *cp++;
1649             } else if (*cp=='2') {
1650                 /*
1651                  * -t with put/get
1652                  *
1653                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1654                  *
1655                  */
1656                 nop_f = 2;
1657                 *cp++;
1658             } else
1659                 nop_f = TRUE;
1660             continue;
1661         case 'j':           /* JIS output */
1662         case 'n':
1663             output_conv = j_oconv;
1664             continue;
1665         case 'e':           /* AT&T EUC output */
1666             output_conv = e_oconv;
1667             cp932inv_f = FALSE;
1668             continue;
1669         case 's':           /* SJIS output */
1670             output_conv = s_oconv;
1671             continue;
1672         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1673             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1674             input_f = LATIN1_INPUT;
1675             continue;
1676         case 'i':           /* Kanji IN ESC-$-@/B */
1677             if (*cp=='@'||*cp=='B')
1678                 kanji_intro = *cp++;
1679             continue;
1680         case 'o':           /* ASCII IN ESC-(-J/B */
1681             if (*cp=='J'||*cp=='B'||*cp=='H')
1682                 ascii_intro = *cp++;
1683             continue;
1684         case 'h':
1685             /*
1686                 bit:1   katakana->hiragana
1687                 bit:2   hiragana->katakana
1688             */
1689             if ('9'>= *cp && *cp>='0')
1690                 hira_f |= (*cp++ -'0');
1691             else
1692                 hira_f |= 1;
1693             continue;
1694         case 'r':
1695             rot_f = TRUE;
1696             continue;
1697 #if defined(MSDOS) || defined(__OS2__)
1698         case 'T':
1699             binmode_f = FALSE;
1700             continue;
1701 #endif
1702 #ifndef PERL_XS
1703         case 'V':
1704             show_configuration();
1705             exit(1);
1706             break;
1707         case 'v':
1708             usage();
1709             exit(1);
1710             break;
1711 #endif
1712 #ifdef UTF8_OUTPUT_ENABLE
1713         case 'w':           /* UTF-8 output */
1714             if (cp[0] == '8') {
1715                 output_conv = w_oconv; cp++;
1716                 if (cp[0] == '0'){
1717                     cp++;
1718                 } else {
1719                     output_bom_f = TRUE;
1720                 }
1721             } else {
1722                 if ('1'== cp[0] && '6'==cp[1]) {
1723                     output_conv = w_oconv16; cp+=2;
1724                 } else if ('3'== cp[0] && '2'==cp[1]) {
1725                     output_conv = w_oconv32; cp+=2;
1726                 } else {
1727                     output_conv = w_oconv;
1728                     continue;
1729                 }
1730                 if (cp[0]=='L') {
1731                     cp++;
1732                     output_endian = ENDIAN_LITTLE;
1733                 } else if (cp[0] == 'B') {
1734                     cp++;
1735                 } else {
1736                     continue;
1737                 }
1738                 if (cp[0] == '0'){
1739                     cp++;
1740                 } else {
1741                     output_bom_f = TRUE;
1742                 }
1743             }
1744             continue;
1745 #endif
1746 #ifdef UTF8_INPUT_ENABLE
1747         case 'W':           /* UTF input */
1748             if (cp[0] == '8') {
1749                 cp++;
1750                 input_f = UTF8_INPUT;
1751             }else{
1752                 if ('1'== cp[0] && '6'==cp[1]) {
1753                     cp += 2;
1754                     input_f = UTF16_INPUT;
1755                     input_endian = ENDIAN_BIG;
1756                 } else if ('3'== cp[0] && '2'==cp[1]) {
1757                     cp += 2;
1758                     input_f = UTF32_INPUT;
1759                     input_endian = ENDIAN_BIG;
1760                 } else {
1761                     input_f = UTF8_INPUT;
1762                     continue;
1763                 }
1764                 if (cp[0]=='L') {
1765                     cp++;
1766                     input_endian = ENDIAN_LITTLE;
1767                 } else if (cp[0] == 'B') {
1768                     cp++;
1769                 }
1770             }
1771             continue;
1772 #endif
1773         /* Input code assumption */
1774         case 'J':   /* JIS input */
1775             input_f = JIS_INPUT;
1776             continue;
1777         case 'E':   /* AT&T EUC input */
1778             input_f = EUC_INPUT;
1779             continue;
1780         case 'S':   /* MS Kanji input */
1781             input_f = SJIS_INPUT;
1782             continue;
1783         case 'Z':   /* Convert X0208 alphabet to asii */
1784             /* alpha_f
1785                bit:0   Convert JIS X 0208 Alphabet to ASCII
1786                bit:1   Convert Kankaku to one space
1787                bit:2   Convert Kankaku to two spaces
1788                bit:3   Convert HTML Entity
1789                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1790             */
1791             while ('0'<= *cp && *cp <='9') {
1792                 alpha_f |= 1 << (*cp++ - '0');
1793             }
1794             if (!alpha_f) alpha_f = 1;
1795             continue;
1796         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1797             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1798             /* accept  X0201
1799                     ESC-(-I     in JIS, EUC, MS Kanji
1800                     SI/SO       in JIS, EUC, MS Kanji
1801                     SSO         in EUC, JIS, not in MS Kanji
1802                     MS Kanji (0xa0-0xdf)
1803                output  X0201
1804                     ESC-(-I     in JIS (0x20-0x5f)
1805                     SSO         in EUC (0xa0-0xdf)
1806                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1807             */
1808             continue;
1809         case 'X':   /* Convert X0201 kana to X0208 */
1810             x0201_f = TRUE;
1811             continue;
1812         case 'F':   /* prserve new lines */
1813             fold_preserve_f = TRUE;
1814         case 'f':   /* folding -f60 or -f */
1815             fold_f = TRUE;
1816             fold_len = 0;
1817             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1818                 fold_len *= 10;
1819                 fold_len += *cp++ - '0';
1820             }
1821             if (!(0<fold_len && fold_len<BUFSIZ))
1822                 fold_len = DEFAULT_FOLD;
1823             if (*cp=='-') {
1824                 fold_margin = 0;
1825                 cp++;
1826                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1827                     fold_margin *= 10;
1828                     fold_margin += *cp++ - '0';
1829                 }
1830             }
1831             continue;
1832         case 'm':   /* MIME support */
1833             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1834             if (*cp=='B'||*cp=='Q') {
1835                 mime_decode_mode = *cp++;
1836                 mimebuf_f = FIXED_MIME;
1837             } else if (*cp=='N') {
1838                 mime_f = TRUE; cp++;
1839             } else if (*cp=='S') {
1840                 mime_f = STRICT_MIME; cp++;
1841             } else if (*cp=='0') {
1842                 mime_decode_f = FALSE;
1843                 mime_f = FALSE; cp++;
1844             }
1845             continue;
1846         case 'M':   /* MIME output */
1847             if (*cp=='B') {
1848                 mimeout_mode = 'B';
1849                 mimeout_f = FIXED_MIME; cp++;
1850             } else if (*cp=='Q') {
1851                 mimeout_mode = 'Q';
1852                 mimeout_f = FIXED_MIME; cp++;
1853             } else {
1854                 mimeout_f = TRUE;
1855             }
1856             continue;
1857         case 'B':   /* Broken JIS support */
1858             /*  bit:0   no ESC JIS
1859                 bit:1   allow any x on ESC-(-x or ESC-$-x
1860                 bit:2   reset to ascii on NL
1861             */
1862             if ('9'>= *cp && *cp>='0')
1863                 broken_f |= 1<<(*cp++ -'0');
1864             else
1865                 broken_f |= TRUE;
1866             continue;
1867 #ifndef PERL_XS
1868         case 'O':/* for Output file */
1869             file_out_f = TRUE;
1870             continue;
1871 #endif
1872         case 'c':/* add cr code */
1873             nlmode_f = CRLF;
1874             continue;
1875         case 'd':/* delete cr code */
1876             nlmode_f = LF;
1877             continue;
1878         case 'I':   /* ISO-2022-JP output */
1879             iso2022jp_f = TRUE;
1880             continue;
1881         case 'L':  /* line mode */
1882             if (*cp=='u') {         /* unix */
1883                 nlmode_f = LF; cp++;
1884             } else if (*cp=='m') { /* mac */
1885                 nlmode_f = CR; cp++;
1886             } else if (*cp=='w') { /* windows */
1887                 nlmode_f = CRLF; cp++;
1888             } else if (*cp=='0') { /* no conversion  */
1889                 nlmode_f = 0; cp++;
1890             }
1891             continue;
1892 #ifndef PERL_XS
1893         case 'g':
1894             if (*cp == '1') {
1895                 guess_f = 2;
1896                 cp++;
1897             } else if (*cp == '0') {
1898                 guess_f = 1;
1899                 cp++;
1900             } else {
1901                 guess_f = 1;
1902             }
1903             continue;
1904 #endif
1905         case SP:
1906         /* module muliple options in a string are allowed for Perl moudle  */
1907             while(*cp && *cp++!='-');
1908             continue;
1909         default:
1910             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
1911             /* bogus option but ignored */
1912             continue;
1913         }
1914     }
1915 }
1916
1917 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1918 {
1919     if (iconv_func){
1920         struct input_code *p = input_code_list;
1921         while (p->name){
1922             if (iconv_func == p->iconv_func){
1923                 return p;
1924             }
1925             p++;
1926         }
1927     }
1928     return 0;
1929 }
1930
1931 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1932 {
1933 #ifdef INPUT_CODE_FIX
1934     if (f || !input_f)
1935 #endif
1936         if (estab_f != f){
1937             estab_f = f;
1938         }
1939
1940     if (iconv_func
1941 #ifdef INPUT_CODE_FIX
1942         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1943 #endif
1944         ){
1945         iconv = iconv_func;
1946     }
1947 #ifdef CHECK_OPTION
1948     if (estab_f && iconv_for_check != iconv){
1949         struct input_code *p = find_inputcode_byfunc(iconv);
1950         if (p){
1951             set_input_codename(p->name);
1952             debug(p->name);
1953         }
1954         iconv_for_check = iconv;
1955     }
1956 #endif
1957 }
1958
1959 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1960 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1961 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1962 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
1963 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
1964 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1965 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1966 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1967
1968 #define SCORE_INIT (SCORE_iMIME)
1969
1970 static const char score_table_A0[] = {
1971     0, 0, 0, 0,
1972     0, 0, 0, 0,
1973     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1974     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1975 };
1976
1977 static const char score_table_F0[] = {
1978     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1979     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1980     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
1981     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1982 };
1983
1984 void set_code_score(struct input_code *ptr, nkf_char score)
1985 {
1986     if (ptr){
1987         ptr->score |= score;
1988     }
1989 }
1990
1991 void clr_code_score(struct input_code *ptr, nkf_char score)
1992 {
1993     if (ptr){
1994         ptr->score &= ~score;
1995     }
1996 }
1997
1998 void code_score(struct input_code *ptr)
1999 {
2000     nkf_char c2 = ptr->buf[0];
2001 #ifdef UTF8_OUTPUT_ENABLE
2002     nkf_char c1 = ptr->buf[1];
2003 #endif
2004     if (c2 < 0){
2005         set_code_score(ptr, SCORE_ERROR);
2006     }else if (c2 == SSO){
2007         set_code_score(ptr, SCORE_KANA);
2008     }else if (c2 == 0x8f){
2009         set_code_score(ptr, SCORE_X0212);
2010 #ifdef UTF8_OUTPUT_ENABLE
2011     }else if (!e2w_conv(c2, c1)){
2012         set_code_score(ptr, SCORE_NO_EXIST);
2013 #endif
2014     }else if ((c2 & 0x70) == 0x20){
2015         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2016     }else if ((c2 & 0x70) == 0x70){
2017         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2018     }else if ((c2 & 0x70) >= 0x50){
2019         set_code_score(ptr, SCORE_L2);
2020     }
2021 }
2022
2023 void status_disable(struct input_code *ptr)
2024 {
2025     ptr->stat = -1;
2026     ptr->buf[0] = -1;
2027     code_score(ptr);
2028     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2029 }
2030
2031 void status_push_ch(struct input_code *ptr, nkf_char c)
2032 {
2033     ptr->buf[ptr->index++] = c;
2034 }
2035
2036 void status_clear(struct input_code *ptr)
2037 {
2038     ptr->stat = 0;
2039     ptr->index = 0;
2040 }
2041
2042 void status_reset(struct input_code *ptr)
2043 {
2044     status_clear(ptr);
2045     ptr->score = SCORE_INIT;
2046 }
2047
2048 void status_reinit(struct input_code *ptr)
2049 {
2050     status_reset(ptr);
2051     ptr->_file_stat = 0;
2052 }
2053
2054 void status_check(struct input_code *ptr, nkf_char c)
2055 {
2056     if (c <= DEL && estab_f){
2057         status_reset(ptr);
2058     }
2059 }
2060
2061 void s_status(struct input_code *ptr, nkf_char c)
2062 {
2063     switch(ptr->stat){
2064       case -1:
2065           status_check(ptr, c);
2066           break;
2067       case 0:
2068           if (c <= DEL){
2069               break;
2070 #ifdef NUMCHAR_OPTION
2071           }else if (is_unicode_capsule(c)){
2072               break;
2073 #endif
2074           }else if (0xa1 <= c && c <= 0xdf){
2075               status_push_ch(ptr, SSO);
2076               status_push_ch(ptr, c);
2077               code_score(ptr);
2078               status_clear(ptr);
2079           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2080               ptr->stat = 1;
2081               status_push_ch(ptr, c);
2082           }else if (0xed <= c && c <= 0xee){
2083               ptr->stat = 3;
2084               status_push_ch(ptr, c);
2085 #ifdef SHIFTJIS_CP932
2086           }else if (is_ibmext_in_sjis(c)){
2087               ptr->stat = 2;
2088               status_push_ch(ptr, c);
2089 #endif /* SHIFTJIS_CP932 */
2090 #ifdef X0212_ENABLE
2091           }else if (0xf0 <= c && c <= 0xfc){
2092               ptr->stat = 1;
2093               status_push_ch(ptr, c);
2094 #endif /* X0212_ENABLE */
2095           }else{
2096               status_disable(ptr);
2097           }
2098           break;
2099       case 1:
2100           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2101               status_push_ch(ptr, c);
2102               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2103               code_score(ptr);
2104               status_clear(ptr);
2105           }else{
2106               status_disable(ptr);
2107           }
2108           break;
2109       case 2:
2110 #ifdef SHIFTJIS_CP932
2111         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2112             status_push_ch(ptr, c);
2113             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2114                 set_code_score(ptr, SCORE_CP932);
2115                 status_clear(ptr);
2116                 break;
2117             }
2118         }
2119 #endif /* SHIFTJIS_CP932 */
2120         status_disable(ptr);
2121           break;
2122       case 3:
2123           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2124               status_push_ch(ptr, c);
2125               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2126             set_code_score(ptr, SCORE_CP932);
2127             status_clear(ptr);
2128           }else{
2129               status_disable(ptr);
2130           }
2131           break;
2132     }
2133 }
2134
2135 void e_status(struct input_code *ptr, nkf_char c)
2136 {
2137     switch (ptr->stat){
2138       case -1:
2139           status_check(ptr, c);
2140           break;
2141       case 0:
2142           if (c <= DEL){
2143               break;
2144 #ifdef NUMCHAR_OPTION
2145           }else if (is_unicode_capsule(c)){
2146               break;
2147 #endif
2148           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2149               ptr->stat = 1;
2150               status_push_ch(ptr, c);
2151 #ifdef X0212_ENABLE
2152           }else if (0x8f == c){
2153               ptr->stat = 2;
2154               status_push_ch(ptr, c);
2155 #endif /* X0212_ENABLE */
2156           }else{
2157               status_disable(ptr);
2158           }
2159           break;
2160       case 1:
2161           if (0xa1 <= c && c <= 0xfe){
2162               status_push_ch(ptr, c);
2163               code_score(ptr);
2164               status_clear(ptr);
2165           }else{
2166               status_disable(ptr);
2167           }
2168           break;
2169 #ifdef X0212_ENABLE
2170       case 2:
2171           if (0xa1 <= c && c <= 0xfe){
2172               ptr->stat = 1;
2173               status_push_ch(ptr, c);
2174           }else{
2175               status_disable(ptr);
2176           }
2177 #endif /* X0212_ENABLE */
2178     }
2179 }
2180
2181 #ifdef UTF8_INPUT_ENABLE
2182 void w_status(struct input_code *ptr, nkf_char c)
2183 {
2184     switch (ptr->stat){
2185       case -1:
2186           status_check(ptr, c);
2187           break;
2188       case 0:
2189           if (c <= DEL){
2190               break;
2191 #ifdef NUMCHAR_OPTION
2192           }else if (is_unicode_capsule(c)){
2193               break;
2194 #endif
2195           }else if (0xc0 <= c && c <= 0xdf){
2196               ptr->stat = 1;
2197               status_push_ch(ptr, c);
2198           }else if (0xe0 <= c && c <= 0xef){
2199               ptr->stat = 2;
2200               status_push_ch(ptr, c);
2201           }else if (0xf0 <= c && c <= 0xf4){
2202               ptr->stat = 3;
2203               status_push_ch(ptr, c);
2204           }else{
2205               status_disable(ptr);
2206           }
2207           break;
2208       case 1:
2209       case 2:
2210           if (0x80 <= c && c <= 0xbf){
2211               status_push_ch(ptr, c);
2212               if (ptr->index > ptr->stat){
2213                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2214                              && ptr->buf[2] == 0xbf);
2215                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2216                            &ptr->buf[0], &ptr->buf[1]);
2217                   if (!bom){
2218                       code_score(ptr);
2219                   }
2220                   status_clear(ptr);
2221               }
2222           }else{
2223               status_disable(ptr);
2224           }
2225           break;
2226       case 3:
2227         if (0x80 <= c && c <= 0xbf){
2228             if (ptr->index < ptr->stat){
2229                 status_push_ch(ptr, c);
2230             } else {
2231                 status_clear(ptr);
2232             }
2233           }else{
2234               status_disable(ptr);
2235           }
2236           break;
2237     }
2238 }
2239 #endif
2240
2241 void code_status(nkf_char c)
2242 {
2243     int action_flag = 1;
2244     struct input_code *result = 0;
2245     struct input_code *p = input_code_list;
2246     while (p->name){
2247         if (!p->status_func) {
2248             ++p;
2249             continue;
2250         }
2251         if (!p->status_func)
2252             continue;
2253         (p->status_func)(p, c);
2254         if (p->stat > 0){
2255             action_flag = 0;
2256         }else if(p->stat == 0){
2257             if (result){
2258                 action_flag = 0;
2259             }else{
2260                 result = p;
2261             }
2262         }
2263         ++p;
2264     }
2265
2266     if (action_flag){
2267         if (result && !estab_f){
2268             set_iconv(TRUE, result->iconv_func);
2269         }else if (c <= DEL){
2270             struct input_code *ptr = input_code_list;
2271             while (ptr->name){
2272                 status_reset(ptr);
2273                 ++ptr;
2274             }
2275         }
2276     }
2277 }
2278
2279 #ifndef WIN32DLL
2280 nkf_char std_getc(FILE *f)
2281 {
2282     if (std_gc_ndx){
2283         return std_gc_buf[--std_gc_ndx];
2284     }
2285     return getc(f);
2286 }
2287 #endif /*WIN32DLL*/
2288
2289 nkf_char std_ungetc(nkf_char c, FILE *f)
2290 {
2291     if (std_gc_ndx == STD_GC_BUFSIZE){
2292         return EOF;
2293     }
2294     std_gc_buf[std_gc_ndx++] = c;
2295     return c;
2296 }
2297
2298 #ifndef WIN32DLL
2299 void std_putc(nkf_char c)
2300 {
2301     if(c!=EOF)
2302       putchar(c);
2303 }
2304 #endif /*WIN32DLL*/
2305
2306 #if !defined(PERL_XS) && !defined(WIN32DLL)
2307 nkf_char noconvert(FILE *f)
2308 {
2309     nkf_char    c;
2310
2311     if (nop_f == 2)
2312         module_connection();
2313     while ((c = (*i_getc)(f)) != EOF)
2314       (*o_putc)(c);
2315     (*o_putc)(EOF);
2316     return 1;
2317 }
2318 #endif
2319
2320 void module_connection(void)
2321 {
2322     oconv = output_conv;
2323     o_putc = std_putc;
2324
2325     /* replace continucation module, from output side */
2326
2327     /* output redicrection */
2328 #ifdef CHECK_OPTION
2329     if (noout_f || guess_f){
2330         o_putc = no_putc;
2331     }
2332 #endif
2333     if (mimeout_f) {
2334         o_mputc = o_putc;
2335         o_putc = mime_putc;
2336         if (mimeout_f == TRUE) {
2337             o_base64conv = oconv; oconv = base64_conv;
2338         }
2339         /* base64_count = 0; */
2340     }
2341
2342     if (nlmode_f || guess_f) {
2343         o_nlconv = oconv; oconv = nl_conv;
2344     }
2345     if (rot_f) {
2346         o_rot_conv = oconv; oconv = rot_conv;
2347     }
2348     if (iso2022jp_f) {
2349         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2350     }
2351     if (hira_f) {
2352         o_hira_conv = oconv; oconv = hira_conv;
2353     }
2354     if (fold_f) {
2355         o_fconv = oconv; oconv = fold_conv;
2356         f_line = 0;
2357     }
2358     if (alpha_f || x0201_f) {
2359         o_zconv = oconv; oconv = z_conv;
2360     }
2361
2362     i_getc = std_getc;
2363     i_ungetc = std_ungetc;
2364     /* input redicrection */
2365 #ifdef INPUT_OPTION
2366     if (cap_f){
2367         i_cgetc = i_getc; i_getc = cap_getc;
2368         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2369     }
2370     if (url_f){
2371         i_ugetc = i_getc; i_getc = url_getc;
2372         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2373     }
2374 #endif
2375 #ifdef NUMCHAR_OPTION
2376     if (numchar_f){
2377         i_ngetc = i_getc; i_getc = numchar_getc;
2378         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2379     }
2380 #endif
2381 #ifdef UNICODE_NORMALIZATION
2382     if (nfc_f && input_f == UTF8_INPUT){
2383         i_nfc_getc = i_getc; i_getc = nfc_getc;
2384         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2385     }
2386 #endif
2387     if (mime_f && mimebuf_f==FIXED_MIME) {
2388         i_mgetc = i_getc; i_getc = mime_getc;
2389         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2390     }
2391     if (broken_f & 1) {
2392         i_bgetc = i_getc; i_getc = broken_getc;
2393         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2394     }
2395     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2396         set_iconv(-TRUE, e_iconv);
2397     } else if (input_f == SJIS_INPUT) {
2398         set_iconv(-TRUE, s_iconv);
2399 #ifdef UTF8_INPUT_ENABLE
2400     } else if (input_f == UTF8_INPUT) {
2401         set_iconv(-TRUE, w_iconv);
2402     } else if (input_f == UTF16_INPUT) {
2403         set_iconv(-TRUE, w_iconv16);
2404     } else if (input_f == UTF32_INPUT) {
2405         set_iconv(-TRUE, w_iconv32);
2406 #endif
2407     } else {
2408         set_iconv(FALSE, e_iconv);
2409     }
2410
2411     {
2412         struct input_code *p = input_code_list;
2413         while (p->name){
2414             status_reinit(p++);
2415         }
2416     }
2417 }
2418
2419 /*
2420  * Check and Ignore BOM
2421  */
2422 void check_bom(FILE *f)
2423 {
2424     int c2;
2425     switch(c2 = (*i_getc)(f)){
2426     case 0x00:
2427         if((c2 = (*i_getc)(f)) == 0x00){
2428             if((c2 = (*i_getc)(f)) == 0xFE){
2429                 if((c2 = (*i_getc)(f)) == 0xFF){
2430                     if(!input_f){
2431                         set_iconv(TRUE, w_iconv32);
2432                     }
2433                     if (iconv == w_iconv32) {
2434                         input_endian = ENDIAN_BIG;
2435                         return;
2436                     }
2437                     (*i_ungetc)(0xFF,f);
2438                 }else (*i_ungetc)(c2,f);
2439                 (*i_ungetc)(0xFE,f);
2440             }else if(c2 == 0xFF){
2441                 if((c2 = (*i_getc)(f)) == 0xFE){
2442                     if(!input_f){
2443                         set_iconv(TRUE, w_iconv32);
2444                     }
2445                     if (iconv == w_iconv32) {
2446                         input_endian = ENDIAN_2143;
2447                         return;
2448                     }
2449                     (*i_ungetc)(0xFF,f);
2450                 }else (*i_ungetc)(c2,f);
2451                 (*i_ungetc)(0xFF,f);
2452             }else (*i_ungetc)(c2,f);
2453             (*i_ungetc)(0x00,f);
2454         }else (*i_ungetc)(c2,f);
2455         (*i_ungetc)(0x00,f);
2456         break;
2457     case 0xEF:
2458         if((c2 = (*i_getc)(f)) == 0xBB){
2459             if((c2 = (*i_getc)(f)) == 0xBF){
2460                 if(!input_f){
2461                     set_iconv(TRUE, w_iconv);
2462                 }
2463                 if (iconv == w_iconv) {
2464                     return;
2465                 }
2466                 (*i_ungetc)(0xBF,f);
2467             }else (*i_ungetc)(c2,f);
2468             (*i_ungetc)(0xBB,f);
2469         }else (*i_ungetc)(c2,f);
2470         (*i_ungetc)(0xEF,f);
2471         break;
2472     case 0xFE:
2473         if((c2 = (*i_getc)(f)) == 0xFF){
2474             if((c2 = (*i_getc)(f)) == 0x00){
2475                 if((c2 = (*i_getc)(f)) == 0x00){
2476                     if(!input_f){
2477                         set_iconv(TRUE, w_iconv32);
2478                     }
2479                     if (iconv == w_iconv32) {
2480                         input_endian = ENDIAN_3412;
2481                         return;
2482                     }
2483                     (*i_ungetc)(0x00,f);
2484                 }else (*i_ungetc)(c2,f);
2485                 (*i_ungetc)(0x00,f);
2486             }else (*i_ungetc)(c2,f);
2487             if(!input_f){
2488                 set_iconv(TRUE, w_iconv16);
2489             }
2490             if (iconv == w_iconv16) {
2491                 input_endian = ENDIAN_BIG;
2492                 return;
2493             }
2494             (*i_ungetc)(0xFF,f);
2495         }else (*i_ungetc)(c2,f);
2496         (*i_ungetc)(0xFE,f);
2497         break;
2498     case 0xFF:
2499         if((c2 = (*i_getc)(f)) == 0xFE){
2500             if((c2 = (*i_getc)(f)) == 0x00){
2501                 if((c2 = (*i_getc)(f)) == 0x00){
2502                     if(!input_f){
2503                         set_iconv(TRUE, w_iconv32);
2504                     }
2505                     if (iconv == w_iconv32) {
2506                         input_endian = ENDIAN_LITTLE;
2507                         return;
2508                     }
2509                     (*i_ungetc)(0x00,f);
2510                 }else (*i_ungetc)(c2,f);
2511                 (*i_ungetc)(0x00,f);
2512             }else (*i_ungetc)(c2,f);
2513             if(!input_f){
2514                 set_iconv(TRUE, w_iconv16);
2515             }
2516             if (iconv == w_iconv16) {
2517                 input_endian = ENDIAN_LITTLE;
2518                 return;
2519             }
2520             (*i_ungetc)(0xFE,f);
2521         }else (*i_ungetc)(c2,f);
2522         (*i_ungetc)(0xFF,f);
2523         break;
2524     default:
2525         (*i_ungetc)(c2,f);
2526         break;
2527     }
2528 }
2529
2530 /*
2531    Conversion main loop. Code detection only.
2532  */
2533
2534 nkf_char kanji_convert(FILE *f)
2535 {
2536     nkf_char    c3, c2=0, c1, c0=0;
2537     int is_8bit = FALSE;
2538
2539     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2540 #ifdef UTF8_INPUT_ENABLE
2541        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2542 #endif
2543       ){
2544         is_8bit = TRUE;
2545     }
2546
2547     input_mode = ASCII;
2548     output_mode = ASCII;
2549     shift_mode = FALSE;
2550
2551 #define NEXT continue      /* no output, get next */
2552 #define SEND ;             /* output c1 and c2, get next */
2553 #define LAST break         /* end of loop, go closing  */
2554
2555     module_connection();
2556     check_bom(f);
2557
2558     while ((c1 = (*i_getc)(f)) != EOF) {
2559 #ifdef INPUT_CODE_FIX
2560         if (!input_f)
2561 #endif
2562             code_status(c1);
2563         if (c2) {
2564             /* second byte */
2565             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2566                 /* in case of 8th bit is on */
2567                 if (!estab_f&&!mime_decode_mode) {
2568                     /* in case of not established yet */
2569                     /* It is still ambiguious */
2570                     if (h_conv(f, c2, c1)==EOF)
2571                         LAST;
2572                     else
2573                         c2 = 0;
2574                     NEXT;
2575                 } else {
2576                     /* in case of already established */
2577                     if (c1 < AT) {
2578                         /* ignore bogus code and not CP5022x UCD */
2579                         c2 = 0;
2580                         NEXT;
2581                     } else {
2582                         SEND;
2583                     }
2584                 }
2585             } else
2586                 /* second byte, 7 bit code */
2587                 /* it might be kanji shitfted */
2588                 if ((c1 == DEL) || (c1 <= SP)) {
2589                     /* ignore bogus first code */
2590                     c2 = 0;
2591                     NEXT;
2592                 } else
2593                     SEND;
2594         } else {
2595             /* first byte */
2596 #ifdef UTF8_INPUT_ENABLE
2597             if (iconv == w_iconv16) {
2598                 if (input_endian == ENDIAN_BIG) {
2599                     c2 = c1;
2600                     if ((c1 = (*i_getc)(f)) != EOF) {
2601                         if (0xD8 <= c2 && c2 <= 0xDB) {
2602                             if ((c0 = (*i_getc)(f)) != EOF) {
2603                                 c0 <<= 8;
2604                                 if ((c3 = (*i_getc)(f)) != EOF) {
2605                                     c0 |= c3;
2606                                 } else c2 = EOF;
2607                             } else c2 = EOF;
2608                         }
2609                     } else c2 = EOF;
2610                 } else {
2611                     if ((c2 = (*i_getc)(f)) != EOF) {
2612                         if (0xD8 <= c2 && c2 <= 0xDB) {
2613                             if ((c3 = (*i_getc)(f)) != EOF) {
2614                                 if ((c0 = (*i_getc)(f)) != EOF) {
2615                                     c0 <<= 8;
2616                                     c0 |= c3;
2617                                 } else c2 = EOF;
2618                             } else c2 = EOF;
2619                         }
2620                     } else c2 = EOF;
2621                 }
2622                 SEND;
2623             } else if(iconv == w_iconv32){
2624                 int c3 = c1;
2625                 if((c2 = (*i_getc)(f)) != EOF &&
2626                    (c1 = (*i_getc)(f)) != EOF &&
2627                    (c0 = (*i_getc)(f)) != EOF){
2628                     switch(input_endian){
2629                     case ENDIAN_BIG:
2630                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2631                         break;
2632                     case ENDIAN_LITTLE:
2633                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2634                         break;
2635                     case ENDIAN_2143:
2636                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2637                         break;
2638                     case ENDIAN_3412:
2639                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2640                         break;
2641                     }
2642                     c2 = 0;
2643                 }else{
2644                     c2 = EOF;
2645                 }
2646                 SEND;
2647             } else
2648 #endif
2649 #ifdef NUMCHAR_OPTION
2650             if (is_unicode_capsule(c1)){
2651                 SEND;
2652             } else
2653 #endif
2654             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2655                 /* 8 bit code */
2656                 if (!estab_f && !iso8859_f) {
2657                     /* not established yet */
2658                     c2 = c1;
2659                     NEXT;
2660                 } else { /* estab_f==TRUE */
2661                     if (iso8859_f) {
2662                         c2 = ISO8859_1;
2663                         c1 &= 0x7f;
2664                         SEND;
2665                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2666                         /* SJIS X0201 Case... */
2667                         if (iso2022jp_f && !x0201_f) {
2668                             (*oconv)(GETA1, GETA2);
2669                             NEXT;
2670                         } else {
2671                             c2 = X0201;
2672                             c1 &= 0x7f;
2673                             SEND;
2674                         }
2675                     } else if (c1==SSO && iconv != s_iconv) {
2676                         /* EUC X0201 Case */
2677                         c1 = (*i_getc)(f);  /* skip SSO */
2678                         code_status(c1);
2679                         if (SSP<=c1 && c1<0xe0) {
2680                             if (iso2022jp_f && !x0201_f) {
2681                                 (*oconv)(GETA1, GETA2);
2682                                 NEXT;
2683                             } else {
2684                                 c2 = X0201;
2685                                 c1 &= 0x7f;
2686                                 SEND;
2687                             }
2688                         } else  { /* bogus code, skip SSO and one byte */
2689                             NEXT;
2690                         }
2691                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2692                                (c1 == 0xFD || c1 == 0xFE)) {
2693                         /* CP10001 */
2694                         c2 = X0201;
2695                         c1 &= 0x7f;
2696                         SEND;
2697                     } else {
2698                        /* already established */
2699                        c2 = c1;
2700                        NEXT;
2701                     }
2702                 }
2703             } else if ((c1 > SP) && (c1 != DEL)) {
2704                 /* in case of Roman characters */
2705                 if (shift_mode) {
2706                     /* output 1 shifted byte */
2707                     if (iso8859_f) {
2708                         c2 = ISO8859_1;
2709                         SEND;
2710                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2711                       /* output 1 shifted byte */
2712                         if (iso2022jp_f && !x0201_f) {
2713                             (*oconv)(GETA1, GETA2);
2714                             NEXT;
2715                         } else {
2716                             c2 = X0201;
2717                             SEND;
2718                         }
2719                     } else {
2720                         /* look like bogus code */
2721                         NEXT;
2722                     }
2723                 } else if (input_mode == X0208 || input_mode == X0212 ||
2724                            input_mode == X0213_1 || input_mode == X0213_2) {
2725                     /* in case of Kanji shifted */
2726                     c2 = c1;
2727                     NEXT;
2728                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2729                     /* Check MIME code */
2730                     if ((c1 = (*i_getc)(f)) == EOF) {
2731                         (*oconv)(0, '=');
2732                         LAST;
2733                     } else if (c1 == '?') {
2734                         /* =? is mime conversion start sequence */
2735                         if(mime_f == STRICT_MIME) {
2736                             /* check in real detail */
2737                             if (mime_begin_strict(f) == EOF)
2738                                 LAST;
2739                             else
2740                                 NEXT;
2741                         } else if (mime_begin(f) == EOF)
2742                             LAST;
2743                         else
2744                             NEXT;
2745                     } else {
2746                         (*oconv)(0, '=');
2747                         (*i_ungetc)(c1,f);
2748                         NEXT;
2749                     }
2750                 } else {
2751                     /* normal ASCII code */
2752                     SEND;
2753                 }
2754             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2755                 shift_mode = FALSE;
2756                 NEXT;
2757             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2758                 shift_mode = TRUE;
2759                 NEXT;
2760             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2761                 if ((c1 = (*i_getc)(f)) == EOF) {
2762                     /*  (*oconv)(0, ESC); don't send bogus code */
2763                     LAST;
2764                 } else if (c1 == '$') {
2765                     if ((c1 = (*i_getc)(f)) == EOF) {
2766                         /*
2767                         (*oconv)(0, ESC); don't send bogus code
2768                         (*oconv)(0, '$'); */
2769                         LAST;
2770                     } else if (c1 == '@'|| c1 == 'B') {
2771                         /* This is kanji introduction */
2772                         input_mode = X0208;
2773                         shift_mode = FALSE;
2774                         set_input_codename("ISO-2022-JP");
2775 #ifdef CHECK_OPTION
2776                         debug("ISO-2022-JP");
2777 #endif
2778                         NEXT;
2779                     } else if (c1 == '(') {
2780                         if ((c1 = (*i_getc)(f)) == EOF) {
2781                             /* don't send bogus code
2782                             (*oconv)(0, ESC);
2783                             (*oconv)(0, '$');
2784                             (*oconv)(0, '(');
2785                                 */
2786                             LAST;
2787                         } else if (c1 == '@'|| c1 == 'B') {
2788                             /* This is kanji introduction */
2789                             input_mode = X0208;
2790                             shift_mode = FALSE;
2791                             NEXT;
2792 #ifdef X0212_ENABLE
2793                         } else if (c1 == 'D'){
2794                             input_mode = X0212;
2795                             shift_mode = FALSE;
2796                             NEXT;
2797 #endif /* X0212_ENABLE */
2798                         } else if (c1 == (X0213_1&0x7F)){
2799                             input_mode = X0213_1;
2800                             shift_mode = FALSE;
2801                             NEXT;
2802                         } else if (c1 == (X0213_2&0x7F)){
2803                             input_mode = X0213_2;
2804                             shift_mode = FALSE;
2805                             NEXT;
2806                         } else {
2807                             /* could be some special code */
2808                             (*oconv)(0, ESC);
2809                             (*oconv)(0, '$');
2810                             (*oconv)(0, '(');
2811                             (*oconv)(0, c1);
2812                             NEXT;
2813                         }
2814                     } else if (broken_f&0x2) {
2815                         /* accept any ESC-(-x as broken code ... */
2816                         input_mode = X0208;
2817                         shift_mode = FALSE;
2818                         NEXT;
2819                     } else {
2820                         (*oconv)(0, ESC);
2821                         (*oconv)(0, '$');
2822                         (*oconv)(0, c1);
2823                         NEXT;
2824                     }
2825                 } else if (c1 == '(') {
2826                     if ((c1 = (*i_getc)(f)) == EOF) {
2827                         /* don't send bogus code
2828                         (*oconv)(0, ESC);
2829                         (*oconv)(0, '('); */
2830                         LAST;
2831                     } else {
2832                         if (c1 == 'I') {
2833                             /* This is X0201 kana introduction */
2834                             input_mode = X0201; shift_mode = X0201;
2835                             NEXT;
2836                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2837                             /* This is X0208 kanji introduction */
2838                             input_mode = ASCII; shift_mode = FALSE;
2839                             NEXT;
2840                         } else if (broken_f&0x2) {
2841                             input_mode = ASCII; shift_mode = FALSE;
2842                             NEXT;
2843                         } else {
2844                             (*oconv)(0, ESC);
2845                             (*oconv)(0, '(');
2846                             /* maintain various input_mode here */
2847                             SEND;
2848                         }
2849                     }
2850                } else if ( c1 == 'N' || c1 == 'n'){
2851                    /* SS2 */
2852                    c3 = (*i_getc)(f);  /* skip SS2 */
2853                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2854                        c1 = c3;
2855                        c2 = X0201;
2856                        SEND;
2857                    }else{
2858                        (*i_ungetc)(c3, f);
2859                        /* lonely ESC  */
2860                        (*oconv)(0, ESC);
2861                        SEND;
2862                    }
2863                 } else {
2864                     /* lonely ESC  */
2865                     (*oconv)(0, ESC);
2866                     SEND;
2867                 }
2868             } else if (c1 == ESC && iconv == s_iconv) {
2869                 /* ESC in Shift_JIS */
2870                 if ((c1 = (*i_getc)(f)) == EOF) {
2871                     /*  (*oconv)(0, ESC); don't send bogus code */
2872                     LAST;
2873                 } else if (c1 == '$') {
2874                     /* J-PHONE emoji */
2875                     if ((c1 = (*i_getc)(f)) == EOF) {
2876                         /*
2877                            (*oconv)(0, ESC); don't send bogus code
2878                            (*oconv)(0, '$'); */
2879                         LAST;
2880                     } else {
2881                         if (('E' <= c1 && c1 <= 'G') ||
2882                             ('O' <= c1 && c1 <= 'Q')) {
2883                             /*
2884                                NUM : 0 1 2 3 4 5
2885                                BYTE: G E F O P Q
2886                                C%7 : 1 6 0 2 3 4
2887                                C%7 : 0 1 2 3 4 5 6
2888                                NUM : 2 0 3 4 5 X 1
2889                              */
2890                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2891                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2892                             while ((c1 = (*i_getc)(f)) != EOF) {
2893                                 if (SP <= c1 && c1 <= 'z') {
2894                                     (*oconv)(0, c1 + c0);
2895                                 } else break; /* c1 == SO */
2896                             }
2897                         }
2898                     }
2899                     if (c1 == EOF) LAST;
2900                     NEXT;
2901                 } else {
2902                     /* lonely ESC  */
2903                     (*oconv)(0, ESC);
2904                     SEND;
2905                 }
2906             } else if (c1 == LF || c1 == CR) {
2907                 if (broken_f&4) {
2908                     input_mode = ASCII; set_iconv(FALSE, 0);
2909                     SEND;
2910                 } else if (mime_decode_f && !mime_decode_mode){
2911                     if (c1 == LF) {
2912                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2913                             i_ungetc(SP,f);
2914                             continue;
2915                         } else {
2916                             i_ungetc(c1,f);
2917                         }
2918                         c1 = LF;
2919                         SEND;
2920                     } else  { /* if (c1 == CR)*/
2921                         if ((c1=(*i_getc)(f))!=EOF) {
2922                             if (c1==SP) {
2923                                 i_ungetc(SP,f);
2924                                 continue;
2925                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
2926                                 i_ungetc(SP,f);
2927                                 continue;
2928                             } else {
2929                                 i_ungetc(c1,f);
2930                             }
2931                             i_ungetc(LF,f);
2932                         } else {
2933                             i_ungetc(c1,f);
2934                         }
2935                         c1 = CR;
2936                         SEND;
2937                     }
2938                 }
2939             } else if (c1 == DEL && input_mode == X0208) {
2940                 /* CP5022x */
2941                 c2 = c1;
2942                 NEXT;
2943             } else
2944                 SEND;
2945         }
2946         /* send: */
2947         switch(input_mode){
2948         case ASCII:
2949             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2950             case -2:
2951                 /* 4 bytes UTF-8 */
2952                 if ((c0 = (*i_getc)(f)) != EOF) {
2953                     code_status(c0);
2954                     c0 <<= 8;
2955                     if ((c3 = (*i_getc)(f)) != EOF) {
2956                         code_status(c3);
2957                         (*iconv)(c2, c1, c0|c3);
2958                     }
2959                 }
2960                 break;
2961             case -1:
2962                 /* 3 bytes EUC or UTF-8 */
2963                 if ((c0 = (*i_getc)(f)) != EOF) {
2964                     code_status(c0);
2965                     (*iconv)(c2, c1, c0);
2966                 }
2967                 break;
2968             }
2969             break;
2970         case X0208:
2971         case X0213_1:
2972             if (ms_ucs_map_f &&
2973                 0x7F <= c2 && c2 <= 0x92 &&
2974                 0x21 <= c1 && c1 <= 0x7E) {
2975                 /* CP932 UDC */
2976                 if(c1 == 0x7F) return 0;
2977                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2978                 c2 = 0;
2979             }
2980             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2981             break;
2982 #ifdef X0212_ENABLE
2983         case X0212:
2984             (*oconv)(PREFIX_EUCG3 | c2, c1);
2985             break;
2986 #endif /* X0212_ENABLE */
2987         case X0213_2:
2988             (*oconv)(PREFIX_EUCG3 | c2, c1);
2989             break;
2990         default:
2991             (*oconv)(input_mode, c1);  /* other special case */
2992         }
2993
2994         c2 = 0;
2995         c0 = 0;
2996         continue;
2997         /* goto next_word */
2998     }
2999
3000     /* epilogue */
3001     (*iconv)(EOF, 0, 0);
3002     if (!input_codename)
3003     {
3004         if (is_8bit) {
3005             struct input_code *p = input_code_list;
3006             struct input_code *result = p;
3007             while (p->name){
3008                 if (p->score < result->score) result = p;
3009                 ++p;
3010             }
3011             set_input_codename(result->name);
3012 #ifdef CHECK_OPTION
3013             debug(result->name);
3014 #endif
3015         }
3016     }
3017     return 1;
3018 }
3019
3020 nkf_char
3021 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3022 {
3023     nkf_char ret, c3, c0;
3024     int hold_index;
3025
3026
3027     /** it must NOT be in the kanji shifte sequence      */
3028     /** it must NOT be written in JIS7                   */
3029     /** and it must be after 2 byte 8bit code            */
3030
3031     hold_count = 0;
3032     push_hold_buf(c2);
3033     push_hold_buf(c1);
3034
3035     while ((c1 = (*i_getc)(f)) != EOF) {
3036         if (c1 == ESC){
3037             (*i_ungetc)(c1,f);
3038             break;
3039         }
3040         code_status(c1);
3041         if (push_hold_buf(c1) == EOF || estab_f){
3042             break;
3043         }
3044     }
3045
3046     if (!estab_f){
3047         struct input_code *p = input_code_list;
3048         struct input_code *result = p;
3049         if (c1 == EOF){
3050             code_status(c1);
3051         }
3052         while (p->name){
3053             if (p->status_func && p->score < result->score){
3054                 result = p;
3055             }
3056             ++p;
3057         }
3058         set_iconv(TRUE, result->iconv_func);
3059     }
3060
3061
3062     /** now,
3063      ** 1) EOF is detected, or
3064      ** 2) Code is established, or
3065      ** 3) Buffer is FULL (but last word is pushed)
3066      **
3067      ** in 1) and 3) cases, we continue to use
3068      ** Kanji codes by oconv and leave estab_f unchanged.
3069      **/
3070
3071     ret = c1;
3072     hold_index = 0;
3073     while (hold_index < hold_count){
3074         c2 = hold_buf[hold_index++];
3075         if (c2 <= DEL
3076 #ifdef NUMCHAR_OPTION
3077             || is_unicode_capsule(c2)
3078 #endif
3079             ){
3080             (*iconv)(0, c2, 0);
3081             continue;
3082         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3083             (*iconv)(X0201, c2, 0);
3084             continue;
3085         }
3086         if (hold_index < hold_count){
3087             c1 = hold_buf[hold_index++];
3088         }else{
3089             c1 = (*i_getc)(f);
3090             if (c1 == EOF){
3091                 c3 = EOF;
3092                 break;
3093             }
3094             code_status(c1);
3095         }
3096         c0 = 0;
3097         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3098         case -2:
3099             /* 4 bytes UTF-8 */
3100             if (hold_index < hold_count){
3101                 c0 = hold_buf[hold_index++];
3102             } else if ((c0 = (*i_getc)(f)) == EOF) {
3103                 ret = EOF;
3104                 break;
3105             } else {
3106                 code_status(c0);
3107                 c0 <<= 8;
3108                 if (hold_index < hold_count){
3109                     c3 = hold_buf[hold_index++];
3110                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3111                     c0 = ret = EOF;
3112                     break;
3113                 } else {
3114                     code_status(c3);
3115                     (*iconv)(c2, c1, c0|c3);
3116                 }
3117             }
3118             break;
3119         case -1:
3120             /* 3 bytes EUC or UTF-8 */
3121             if (hold_index < hold_count){
3122                 c0 = hold_buf[hold_index++];
3123             } else if ((c0 = (*i_getc)(f)) == EOF) {
3124                 ret = EOF;
3125                 break;
3126             } else {
3127                 code_status(c0);
3128             }
3129             (*iconv)(c2, c1, c0);
3130             break;
3131         }
3132         if (c0 == EOF) break;
3133     }
3134     return ret;
3135 }
3136
3137 nkf_char push_hold_buf(nkf_char c2)
3138 {
3139     if (hold_count >= HOLD_SIZE*2)
3140         return (EOF);
3141     hold_buf[hold_count++] = (unsigned char)c2;
3142     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3143 }
3144
3145 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3146 {
3147 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3148     nkf_char val;
3149 #endif
3150     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3151 #ifdef SHIFTJIS_CP932
3152     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3153         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3154         if (val){
3155             c2 = val >> 8;
3156             c1 = val & 0xff;
3157         }
3158     }
3159     if (cp932inv_f
3160         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3161         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3162         if (c){
3163             c2 = c >> 8;
3164             c1 = c & 0xff;
3165         }
3166     }
3167 #endif /* SHIFTJIS_CP932 */
3168 #ifdef X0212_ENABLE
3169     if (!x0213_f && is_ibmext_in_sjis(c2)){
3170         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3171         if (val){
3172             if (val > 0x7FFF){
3173                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3174                 c1 = val & 0xff;
3175             }else{
3176                 c2 = val >> 8;
3177                 c1 = val & 0xff;
3178             }
3179             if (p2) *p2 = c2;
3180             if (p1) *p1 = c1;
3181             return 0;
3182         }
3183     }
3184 #endif
3185     if(c2 >= 0x80){
3186         if(x0213_f && c2 >= 0xF0){
3187             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3188                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3189             }else{ /* 78<=k<=94 */
3190                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3191                 if (0x9E < c1) c2++;
3192             }
3193         }else{
3194             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3195             if (0x9E < c1) c2++;
3196         }
3197         if (c1 < 0x9F)
3198             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3199         else {
3200             c1 = c1 - 0x7E;
3201         }
3202     }
3203
3204 #ifdef X0212_ENABLE
3205     c2 = x0212_unshift(c2);
3206 #endif
3207     if (p2) *p2 = c2;
3208     if (p1) *p1 = c1;
3209     return 0;
3210 }
3211
3212 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3213 {
3214     if (c2 == X0201) {
3215         c1 &= 0x7f;
3216     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3217         /* NOP */
3218     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3219         /* CP932 UDC */
3220         if(c1 == 0x7F) return 0;
3221         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3222         c2 = 0;
3223     } else {
3224         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3225         if (ret) return ret;
3226     }
3227     (*oconv)(c2, c1);
3228     return 0;
3229 }
3230
3231 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3232 {
3233     if (c2 == X0201) {
3234         c1 &= 0x7f;
3235 #ifdef X0212_ENABLE
3236     }else if (c2 == 0x8f){
3237         if (c0 == 0){
3238             return -1;
3239         }
3240         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3241             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3242             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3243             c2 = 0;
3244         } else {
3245             c2 = (c2 << 8) | (c1 & 0x7f);
3246             c1 = c0 & 0x7f;
3247 #ifdef SHIFTJIS_CP932
3248             if (cp51932_f){
3249                 nkf_char s2, s1;
3250                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3251                     s2e_conv(s2, s1, &c2, &c1);
3252                     if (c2 < 0x100){
3253                         c1 &= 0x7f;
3254                         c2 &= 0x7f;
3255                     }
3256                 }
3257             }
3258 #endif /* SHIFTJIS_CP932 */
3259         }
3260 #endif /* X0212_ENABLE */
3261     } else if (c2 == SSO){
3262         c2 = X0201;
3263         c1 &= 0x7f;
3264     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3265         /* NOP */
3266     } else {
3267         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3268             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3269             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3270             c2 = 0;
3271         } else {
3272             c1 &= 0x7f;
3273             c2 &= 0x7f;
3274 #ifdef SHIFTJIS_CP932
3275             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3276                 nkf_char s2, s1;
3277                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3278                     s2e_conv(s2, s1, &c2, &c1);
3279                     if (c2 < 0x100){
3280                         c1 &= 0x7f;
3281                         c2 &= 0x7f;
3282                     }
3283                 }
3284             }
3285 #endif /* SHIFTJIS_CP932 */
3286         }
3287     }
3288     (*oconv)(c2, c1);
3289     return 0;
3290 }
3291
3292 #ifdef UTF8_INPUT_ENABLE
3293 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3294 {
3295     nkf_char ret = 0;
3296
3297     if (!c1){
3298         *p2 = 0;
3299         *p1 = c2;
3300     }else if (0xc0 <= c2 && c2 <= 0xef) {
3301         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3302 #ifdef NUMCHAR_OPTION
3303         if (ret > 0){
3304             if (p2) *p2 = 0;
3305             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3306             ret = 0;
3307         }
3308 #endif
3309     }
3310     return ret;
3311 }
3312
3313 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3314 {
3315     nkf_char ret = 0;
3316     static const char w_iconv_utf8_1st_byte[] =
3317     { /* 0xC0 - 0xFF */
3318         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3319         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3320         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3321         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3322
3323     if (c2 < 0 || 0xff < c2) {
3324     }else if (c2 == 0) { /* 0 : 1 byte*/
3325         c0 = 0;
3326     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3327         return 0;
3328     } else{
3329         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3330         case 21:
3331             if (c1 < 0x80 || 0xBF < c1) return 0;
3332             break;
3333         case 30:
3334             if (c0 == 0) return -1;
3335             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3336                 return 0;
3337             break;
3338         case 31:
3339         case 33:
3340             if (c0 == 0) return -1;
3341             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3342                 return 0;
3343             break;
3344         case 32:
3345             if (c0 == 0) return -1;
3346             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3347                 return 0;
3348             break;
3349         case 40:
3350             if (c0 == 0) return -2;
3351             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3352                 return 0;
3353             break;
3354         case 41:
3355             if (c0 == 0) return -2;
3356             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3357                 return 0;
3358             break;
3359         case 42:
3360             if (c0 == 0) return -2;
3361             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3362                 return 0;
3363             break;
3364         default:
3365             return 0;
3366             break;
3367         }
3368     }
3369     if (c2 == 0 || c2 == EOF){
3370     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3371         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3372         c2 = 0;
3373     } else {
3374         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3375     }
3376     if (ret == 0){
3377         (*oconv)(c2, c1);
3378     }
3379     return ret;
3380 }
3381 #endif
3382
3383 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3384 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3385 {
3386     val &= VALUE_MASK;
3387     if (val < 0x80){
3388         *p2 = val;
3389         *p1 = 0;
3390         *p0 = 0;
3391     }else if (val < 0x800){
3392         *p2 = 0xc0 | (val >> 6);
3393         *p1 = 0x80 | (val & 0x3f);
3394         *p0 = 0;
3395     } else if (val <= NKF_INT32_C(0xFFFF)) {
3396         *p2 = 0xe0 | (val >> 12);
3397         *p1 = 0x80 | ((val >> 6) & 0x3f);
3398         *p0 = 0x80 | (val        & 0x3f);
3399     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3400         *p2 = 0xe0 |  (val >> 16);
3401         *p1 = 0x80 | ((val >> 12) & 0x3f);
3402         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3403     } else {
3404         *p2 = 0;
3405         *p1 = 0;
3406         *p0 = 0;
3407     }
3408 }
3409 #endif
3410
3411 #ifdef UTF8_INPUT_ENABLE
3412 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3413 {
3414     nkf_char val;
3415     if (c2 >= 0xf8) {
3416         val = -1;
3417     } else if (c2 >= 0xf0){
3418         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3419         val = (c2 & 0x0f) << 18;
3420         val |= (c1 & 0x3f) << 12;
3421         val |= (c0 & 0x3f00) >> 2;
3422         val |= (c0 & 0x3f);
3423     }else if (c2 >= 0xe0){
3424         val = (c2 & 0x0f) << 12;
3425         val |= (c1 & 0x3f) << 6;
3426         val |= (c0 & 0x3f);
3427     }else if (c2 >= 0xc0){
3428         val = (c2 & 0x1f) << 6;
3429         val |= (c1 & 0x3f);
3430     }else{
3431         val = c2;
3432     }
3433     return val;
3434 }
3435
3436 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3437 {
3438     nkf_char c2, c1, c0;
3439     nkf_char ret = 0;
3440     val &= VALUE_MASK;
3441     if (val < 0x80){
3442         *p2 = 0;
3443         *p1 = val;
3444     }else{
3445         w16w_conv(val, &c2, &c1, &c0);
3446         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3447 #ifdef NUMCHAR_OPTION
3448         if (ret > 0){
3449             *p2 = 0;
3450             *p1 = CLASS_UNICODE | val;
3451             ret = 0;
3452         }
3453 #endif
3454     }
3455     return ret;
3456 }
3457 #endif
3458
3459 #ifdef UTF8_INPUT_ENABLE
3460 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3461 {
3462     nkf_char ret = 0;
3463     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3464         (*oconv)(c2, c1);
3465         return 0;
3466     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3467         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3468             return -2;
3469         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3470         c2 = 0;
3471     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3472         /*
3473            return 2;
3474         */
3475         return 1;
3476     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3477     if (ret) return ret;
3478     (*oconv)(c2, c1);
3479     return 0;
3480 }
3481
3482 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3483 {
3484     int ret = 0;
3485
3486     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3487     } else if (is_unicode_bmp(c1)) {
3488         ret = w16e_conv(c1, &c2, &c1);
3489     } else {
3490         c2 = 0;
3491         c1 =  CLASS_UNICODE | c1;
3492     }
3493     if (ret) return ret;
3494     (*oconv)(c2, c1);
3495     return 0;
3496 }
3497
3498 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3499 {
3500     const unsigned short *const *pp;
3501     const unsigned short *const *const *ppp;
3502     static const char no_best_fit_chars_table_C2[] =
3503     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3504         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3505         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3506         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3507     static const char no_best_fit_chars_table_C2_ms[] =
3508     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3509         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3510         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3511         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3512     static const char no_best_fit_chars_table_932_C2[] =
3513     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3514         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3515         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3516         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3517     static const char no_best_fit_chars_table_932_C3[] =
3518     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3519         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3520         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3521         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3522     nkf_char ret = 0;
3523
3524     if(c2 < 0x80){
3525         *p2 = 0;
3526         *p1 = c2;
3527     }else if(c2 < 0xe0){
3528         if(no_best_fit_chars_f){
3529             if(ms_ucs_map_f == UCS_MAP_CP932){
3530                 switch(c2){
3531                 case 0xC2:
3532                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3533                     break;
3534                 case 0xC3:
3535                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3536                     break;
3537                 }
3538             }else if(!cp932inv_f){
3539                 switch(c2){
3540                 case 0xC2:
3541                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3542                     break;
3543                 case 0xC3:
3544                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3545                     break;
3546                 }
3547             }else if(ms_ucs_map_f == UCS_MAP_MS){
3548                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3549             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3550                 switch(c2){
3551                 case 0xC2:
3552                     switch(c1){
3553                     case 0xA2:
3554                     case 0xA3:
3555                     case 0xA5:
3556                     case 0xA6:
3557                     case 0xAC:
3558                     case 0xAF:
3559                     case 0xB8:
3560                         return 1;
3561                     }
3562                     break;
3563                 }
3564             }
3565         }
3566         pp =
3567             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3568             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3569             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3570             utf8_to_euc_2bytes;
3571         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3572     }else if(c0 < 0xF0){
3573         if(no_best_fit_chars_f){
3574             if(ms_ucs_map_f == UCS_MAP_CP932){
3575                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3576             }else if(ms_ucs_map_f == UCS_MAP_MS){
3577                 switch(c2){
3578                 case 0xE2:
3579                     switch(c1){
3580                     case 0x80:
3581                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3582                         break;
3583                     case 0x88:
3584                         if(c0 == 0x92) return 1;
3585                         break;
3586                     }
3587                     break;
3588                 case 0xE3:
3589                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3590                     break;
3591                 }
3592             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3593                 switch(c2){
3594                 case 0xE3:
3595                     switch(c1){
3596                     case 0x82:
3597                             if(c0 == 0x94) return 1;
3598                         break;
3599                     case 0x83:
3600                             if(c0 == 0xBB) return 1;
3601                         break;
3602                     }
3603                     break;
3604                 }
3605             }else{
3606                 switch(c2){
3607                 case 0xE2:
3608                     switch(c1){
3609                     case 0x80:
3610                         if(c0 == 0x95) return 1;
3611                         break;
3612                     case 0x88:
3613                         if(c0 == 0xA5) return 1;
3614                         break;
3615                     }
3616                     break;
3617                 case 0xEF:
3618                     switch(c1){
3619                     case 0xBC:
3620                         if(c0 == 0x8D) return 1;
3621                         break;
3622                     case 0xBD:
3623                         if(c0 == 0x9E && !cp932inv_f) return 1;
3624                         break;
3625                     case 0xBF:
3626                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3627                         break;
3628                     }
3629                     break;
3630                 }
3631             }
3632         }
3633         ppp =
3634             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3635             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3636             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3637             utf8_to_euc_3bytes;
3638         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3639     }else return -1;
3640 #ifdef SHIFTJIS_CP932
3641     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3642         nkf_char s2, s1;
3643         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3644             s2e_conv(s2, s1, p2, p1);
3645         }else{
3646             ret = 1;
3647         }
3648     }
3649 #endif
3650     return ret;
3651 }
3652
3653 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3654 {
3655     nkf_char c2;
3656     const unsigned short *p;
3657     unsigned short val;
3658
3659     if (pp == 0) return 1;
3660
3661     c1 -= 0x80;
3662     if (c1 < 0 || psize <= c1) return 1;
3663     p = pp[c1];
3664     if (p == 0)  return 1;
3665
3666     c0 -= 0x80;
3667     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3668     val = p[c0];
3669     if (val == 0) return 1;
3670     if (no_cp932ext_f && (
3671         (val>>8) == 0x2D || /* NEC special characters */
3672         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3673         )) return 1;
3674
3675     c2 = val >> 8;
3676    if (val > 0x7FFF){
3677         c2 &= 0x7f;
3678         c2 |= PREFIX_EUCG3;
3679     }
3680     if (c2 == SO) c2 = X0201;
3681     c1 = val & 0x7f;
3682     if (p2) *p2 = c2;
3683     if (p1) *p1 = c1;
3684     return 0;
3685 }
3686
3687 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3688 {
3689     int shift = 20;
3690     c &= VALUE_MASK;
3691     while(shift >= 0){
3692         if(c >= 1<<shift){
3693             while(shift >= 0){
3694                 (*f)(0, bin2hex(c>>shift));
3695                 shift -= 4;
3696             }
3697         }else{
3698             shift -= 4;
3699         }
3700     }
3701     return;
3702 }
3703
3704 void encode_fallback_html(nkf_char c)
3705 {
3706     (*oconv)(0, '&');
3707     (*oconv)(0, '#');
3708     c &= VALUE_MASK;
3709     if(c >= NKF_INT32_C(1000000))
3710         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3711     if(c >= NKF_INT32_C(100000))
3712         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3713     if(c >= 10000)
3714         (*oconv)(0, 0x30+(c/10000  )%10);
3715     if(c >= 1000)
3716         (*oconv)(0, 0x30+(c/1000   )%10);
3717     if(c >= 100)
3718         (*oconv)(0, 0x30+(c/100    )%10);
3719     if(c >= 10)
3720         (*oconv)(0, 0x30+(c/10     )%10);
3721     if(c >= 0)
3722         (*oconv)(0, 0x30+ c         %10);
3723     (*oconv)(0, ';');
3724     return;
3725 }
3726
3727 void encode_fallback_xml(nkf_char c)
3728 {
3729     (*oconv)(0, '&');
3730     (*oconv)(0, '#');
3731     (*oconv)(0, 'x');
3732     nkf_each_char_to_hex(oconv, c);
3733     (*oconv)(0, ';');
3734     return;
3735 }
3736
3737 void encode_fallback_java(nkf_char c)
3738 {
3739     (*oconv)(0, '\\');
3740     c &= VALUE_MASK;
3741     if(!is_unicode_bmp(c)){
3742         (*oconv)(0, 'U');
3743         (*oconv)(0, '0');
3744         (*oconv)(0, '0');
3745         (*oconv)(0, bin2hex(c>>20));
3746         (*oconv)(0, bin2hex(c>>16));
3747     }else{
3748         (*oconv)(0, 'u');
3749     }
3750     (*oconv)(0, bin2hex(c>>12));
3751     (*oconv)(0, bin2hex(c>> 8));
3752     (*oconv)(0, bin2hex(c>> 4));
3753     (*oconv)(0, bin2hex(c    ));
3754     return;
3755 }
3756
3757 void encode_fallback_perl(nkf_char c)
3758 {
3759     (*oconv)(0, '\\');
3760     (*oconv)(0, 'x');
3761     (*oconv)(0, '{');
3762     nkf_each_char_to_hex(oconv, c);
3763     (*oconv)(0, '}');
3764     return;
3765 }
3766
3767 void encode_fallback_subchar(nkf_char c)
3768 {
3769     c = unicode_subchar;
3770     (*oconv)((c>>8)&0xFF, c&0xFF);
3771     return;
3772 }
3773 #endif
3774
3775 #ifdef UTF8_OUTPUT_ENABLE
3776 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3777 {
3778     const unsigned short *p;
3779
3780     if (c2 == X0201) {
3781         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3782             switch (c1) {
3783             case 0x20:
3784                 return 0xA0;
3785             case 0x7D:
3786                 return 0xA9;
3787             }
3788         }
3789         p = euc_to_utf8_1byte;
3790 #ifdef X0212_ENABLE
3791     } else if (is_eucg3(c2)){
3792         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3793             return 0xA6;
3794         }
3795         c2 = (c2&0x7f) - 0x21;
3796         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3797             p = x0212_to_utf8_2bytes[c2];
3798         else
3799             return 0;
3800 #endif
3801     } else {
3802         c2 &= 0x7f;
3803         c2 = (c2&0x7f) - 0x21;
3804         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3805             p =
3806                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3807                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3808                 euc_to_utf8_2bytes_ms[c2];
3809         else
3810             return 0;
3811     }
3812     if (!p) return 0;
3813     c1 = (c1 & 0x7f) - 0x21;
3814     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3815         return p[c1];
3816     return 0;
3817 }
3818
3819 void w_oconv(nkf_char c2, nkf_char c1)
3820 {
3821     nkf_char c0;
3822     nkf_char val;
3823
3824     if (output_bom_f) {
3825         output_bom_f = FALSE;
3826         (*o_putc)('\357');
3827         (*o_putc)('\273');
3828         (*o_putc)('\277');
3829     }
3830
3831     if (c2 == EOF) {
3832         (*o_putc)(EOF);
3833         return;
3834     }
3835
3836 #ifdef NUMCHAR_OPTION
3837     if (c2 == 0 && is_unicode_capsule(c1)){
3838         val = c1 & VALUE_MASK;
3839         if (val < 0x80){
3840             (*o_putc)(val);
3841         }else if (val < 0x800){
3842             (*o_putc)(0xC0 | (val >> 6));
3843             (*o_putc)(0x80 | (val & 0x3f));
3844         } else if (val <= NKF_INT32_C(0xFFFF)) {
3845             (*o_putc)(0xE0 | (val >> 12));
3846             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3847             (*o_putc)(0x80 | (val        & 0x3f));
3848         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3849             (*o_putc)(0xF0 | ( val>>18));
3850             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3851             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3852             (*o_putc)(0x80 | ( val      & 0x3f));
3853         }
3854         return;
3855     }
3856 #endif
3857
3858     if (c2 == 0) {
3859         output_mode = ASCII;
3860         (*o_putc)(c1);
3861     } else if (c2 == ISO8859_1) {
3862         output_mode = UTF8;
3863         (*o_putc)(c1 | 0x080);
3864     } else {
3865         output_mode = UTF8;
3866         val = e2w_conv(c2, c1);
3867         if (val){
3868             w16w_conv(val, &c2, &c1, &c0);
3869             (*o_putc)(c2);
3870             if (c1){
3871                 (*o_putc)(c1);
3872                 if (c0) (*o_putc)(c0);
3873             }
3874         }
3875     }
3876 }
3877
3878 void w_oconv16(nkf_char c2, nkf_char c1)
3879 {
3880     if (output_bom_f) {
3881         output_bom_f = FALSE;
3882         if (output_endian == ENDIAN_LITTLE){
3883             (*o_putc)((unsigned char)'\377');
3884             (*o_putc)('\376');
3885         }else{
3886             (*o_putc)('\376');
3887             (*o_putc)((unsigned char)'\377');
3888         }
3889     }
3890
3891     if (c2 == EOF) {
3892         (*o_putc)(EOF);
3893         return;
3894     }
3895
3896     if (c2 == ISO8859_1) {
3897         c2 = 0;
3898         c1 |= 0x80;
3899 #ifdef NUMCHAR_OPTION
3900     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3901         if (is_unicode_bmp(c1)) {
3902             c2 = (c1 >> 8) & 0xff;
3903             c1 &= 0xff;
3904         } else {
3905             c1 &= VALUE_MASK;
3906             if (c1 <= UNICODE_MAX) {
3907                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3908                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3909                 if (output_endian == ENDIAN_LITTLE){
3910                     (*o_putc)(c2 & 0xff);
3911                     (*o_putc)((c2 >> 8) & 0xff);
3912                     (*o_putc)(c1 & 0xff);
3913                     (*o_putc)((c1 >> 8) & 0xff);
3914                 }else{
3915                     (*o_putc)((c2 >> 8) & 0xff);
3916                     (*o_putc)(c2 & 0xff);
3917                     (*o_putc)((c1 >> 8) & 0xff);
3918                     (*o_putc)(c1 & 0xff);
3919                 }
3920             }
3921             return;
3922         }
3923 #endif
3924     } else if (c2) {
3925         nkf_char val = e2w_conv(c2, c1);
3926         c2 = (val >> 8) & 0xff;
3927         c1 = val & 0xff;
3928         if (!val) return;
3929     }
3930     if (output_endian == ENDIAN_LITTLE){
3931         (*o_putc)(c1);
3932         (*o_putc)(c2);
3933     }else{
3934         (*o_putc)(c2);
3935         (*o_putc)(c1);
3936     }
3937 }
3938
3939 void w_oconv32(nkf_char c2, nkf_char c1)
3940 {
3941     if (output_bom_f) {
3942         output_bom_f = FALSE;
3943         if (output_endian == ENDIAN_LITTLE){
3944             (*o_putc)((unsigned char)'\377');
3945             (*o_putc)('\376');
3946             (*o_putc)('\000');
3947             (*o_putc)('\000');
3948         }else{
3949             (*o_putc)('\000');
3950             (*o_putc)('\000');
3951             (*o_putc)('\376');
3952             (*o_putc)((unsigned char)'\377');
3953         }
3954     }
3955
3956     if (c2 == EOF) {
3957         (*o_putc)(EOF);
3958         return;
3959     }
3960
3961     if (c2 == ISO8859_1) {
3962         c1 |= 0x80;
3963 #ifdef NUMCHAR_OPTION
3964     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3965         c1 &= VALUE_MASK;
3966 #endif
3967     } else if (c2) {
3968         c1 = e2w_conv(c2, c1);
3969         if (!c1) return;
3970     }
3971     if (output_endian == ENDIAN_LITTLE){
3972         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3973         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3974         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3975         (*o_putc)('\000');
3976     }else{
3977         (*o_putc)('\000');
3978         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3979         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3980         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3981     }
3982 }
3983 #endif
3984
3985 void e_oconv(nkf_char c2, nkf_char c1)
3986 {
3987 #ifdef NUMCHAR_OPTION
3988     if (c2 == 0 && is_unicode_capsule(c1)){
3989         w16e_conv(c1, &c2, &c1);
3990         if (c2 == 0 && is_unicode_capsule(c1)){
3991             c2 = c1 & VALUE_MASK;
3992             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
3993                 /* eucJP-ms UDC */
3994                 c1 &= 0xFFF;
3995                 c2 = c1 / 94;
3996                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
3997                 c1 = 0x21 + c1 % 94;
3998                 if (is_eucg3(c2)){
3999                     (*o_putc)(0x8f);
4000                     (*o_putc)((c2 & 0x7f) | 0x080);
4001                     (*o_putc)(c1 | 0x080);
4002                 }else{
4003                     (*o_putc)((c2 & 0x7f) | 0x080);
4004                     (*o_putc)(c1 | 0x080);
4005                 }
4006                 return;
4007             } else {
4008                 if (encode_fallback) (*encode_fallback)(c1);
4009                 return;
4010             }
4011         }
4012     }
4013 #endif
4014     if (c2 == EOF) {
4015         (*o_putc)(EOF);
4016         return;
4017     } else if (c2 == 0) {
4018         output_mode = ASCII;
4019         (*o_putc)(c1);
4020     } else if (c2 == X0201) {
4021         output_mode = JAPANESE_EUC;
4022         (*o_putc)(SSO); (*o_putc)(c1|0x80);
4023     } else if (c2 == ISO8859_1) {
4024         output_mode = ISO8859_1;
4025         (*o_putc)(c1 | 0x080);
4026 #ifdef X0212_ENABLE
4027     } else if (is_eucg3(c2)){
4028         output_mode = JAPANESE_EUC;
4029 #ifdef SHIFTJIS_CP932
4030         if (!cp932inv_f){
4031             nkf_char s2, s1;
4032             if (e2s_conv(c2, c1, &s2, &s1) == 0){
4033                 s2e_conv(s2, s1, &c2, &c1);
4034             }
4035         }
4036 #endif
4037         if (c2 == 0) {
4038             output_mode = ASCII;
4039             (*o_putc)(c1);
4040         }else if (is_eucg3(c2)){
4041             if (x0212_f){
4042                 (*o_putc)(0x8f);
4043                 (*o_putc)((c2 & 0x7f) | 0x080);
4044                 (*o_putc)(c1 | 0x080);
4045             }
4046         }else{
4047             (*o_putc)((c2 & 0x7f) | 0x080);
4048             (*o_putc)(c1 | 0x080);
4049         }
4050 #endif
4051     } else {
4052         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
4053             set_iconv(FALSE, 0);
4054             return; /* too late to rescue this char */
4055         }
4056         output_mode = JAPANESE_EUC;
4057         (*o_putc)(c2 | 0x080);
4058         (*o_putc)(c1 | 0x080);
4059     }
4060 }
4061
4062 #ifdef X0212_ENABLE
4063 nkf_char x0212_shift(nkf_char c)
4064 {
4065     nkf_char ret = c;
4066     c &= 0x7f;
4067     if (is_eucg3(ret)){
4068         if (0x75 <= c && c <= 0x7f){
4069             ret = c + (0x109 - 0x75);
4070         }
4071     }else{
4072         if (0x75 <= c && c <= 0x7f){
4073             ret = c + (0x113 - 0x75);
4074         }
4075     }
4076     return ret;
4077 }
4078
4079
4080 nkf_char x0212_unshift(nkf_char c)
4081 {
4082     nkf_char ret = c;
4083     if (0x7f <= c && c <= 0x88){
4084         ret = c + (0x75 - 0x7f);
4085     }else if (0x89 <= c && c <= 0x92){
4086         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4087     }
4088     return ret;
4089 }
4090 #endif /* X0212_ENABLE */
4091
4092 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4093 {
4094     nkf_char ndx;
4095     if (is_eucg3(c2)){
4096         ndx = c2 & 0x7f;
4097         if (x0213_f){
4098             if((0x21 <= ndx && ndx <= 0x2F)){
4099                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4100                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4101                 return 0;
4102             }else if(0x6E <= ndx && ndx <= 0x7E){
4103                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4104                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4105                 return 0;
4106             }
4107             return 1;
4108         }
4109 #ifdef X0212_ENABLE
4110         else if(nkf_isgraph(ndx)){
4111             nkf_char val = 0;
4112             const unsigned short *ptr;
4113             ptr = x0212_shiftjis[ndx - 0x21];
4114             if (ptr){
4115                 val = ptr[(c1 & 0x7f) - 0x21];
4116             }
4117             if (val){
4118                 c2 = val >> 8;
4119                 c1 = val & 0xff;
4120                 if (p2) *p2 = c2;
4121                 if (p1) *p1 = c1;
4122                 return 0;
4123             }
4124             c2 = x0212_shift(c2);
4125         }
4126 #endif /* X0212_ENABLE */
4127     }
4128     if(0x7F < c2) return 1;
4129     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4130     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4131     return 0;
4132 }
4133
4134 void s_oconv(nkf_char c2, nkf_char c1)
4135 {
4136 #ifdef NUMCHAR_OPTION
4137     if (c2 == 0 && is_unicode_capsule(c1)){
4138         w16e_conv(c1, &c2, &c1);
4139         if (c2 == 0 && is_unicode_capsule(c1)){
4140             c2 = c1 & VALUE_MASK;
4141             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
4142                 /* CP932 UDC */
4143                 c1 &= 0xFFF;
4144                 c2 = c1 / 188 + 0xF0;
4145                 c1 = c1 % 188;
4146                 c1 += 0x40 + (c1 > 0x3e);
4147                 (*o_putc)(c2);
4148                 (*o_putc)(c1);
4149                 return;
4150             } else {
4151                 if(encode_fallback)(*encode_fallback)(c1);
4152                 return;
4153             }
4154         }
4155     }
4156 #endif
4157     if (c2 == EOF) {
4158         (*o_putc)(EOF);
4159         return;
4160     } else if (c2 == 0) {
4161         output_mode = ASCII;
4162         (*o_putc)(c1);
4163     } else if (c2 == X0201) {
4164         output_mode = SHIFT_JIS;
4165         (*o_putc)(c1|0x80);
4166     } else if (c2 == ISO8859_1) {
4167         output_mode = ISO8859_1;
4168         (*o_putc)(c1 | 0x080);
4169 #ifdef X0212_ENABLE
4170     } else if (is_eucg3(c2)){
4171         output_mode = SHIFT_JIS;
4172         if (e2s_conv(c2, c1, &c2, &c1) == 0){
4173             (*o_putc)(c2);
4174             (*o_putc)(c1);
4175         }
4176 #endif
4177     } else {
4178         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4179             set_iconv(FALSE, 0);
4180             return; /* too late to rescue this char */
4181         }
4182         output_mode = SHIFT_JIS;
4183         e2s_conv(c2, c1, &c2, &c1);
4184
4185 #ifdef SHIFTJIS_CP932
4186         if (cp932inv_f
4187             && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
4188             nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
4189             if (c){
4190                 c2 = c >> 8;
4191                 c1 = c & 0xff;
4192             }
4193         }
4194 #endif /* SHIFTJIS_CP932 */
4195
4196         (*o_putc)(c2);
4197         if (prefix_table[(unsigned char)c1]){
4198             (*o_putc)(prefix_table[(unsigned char)c1]);
4199         }