OSDN Git Service

* s/nextline/newline/g.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.149 2007/11/18 12:05:18 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-11-18"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #ifndef MIME_DECODE_DEFAULT
44 #define MIME_DECODE_DEFAULT STRICT_MIME
45 #endif
46
47 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
48 #define MSDOS
49 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
50 #define __WIN32__
51 #endif
52 #endif
53
54 #ifdef PERL_XS
55 #undef OVERWRITE
56 #endif
57
58 #ifndef PERL_XS
59 #include <stdio.h>
60 #endif
61
62 #include <stdlib.h>
63 #include <string.h>
64
65 #if defined(MSDOS) || defined(__OS2__)
66 #include <fcntl.h>
67 #include <io.h>
68 #if defined(_MSC_VER) || defined(__WATCOMC__)
69 #define mktemp _mktemp
70 #endif
71 #endif
72
73 #ifdef MSDOS
74 #ifdef LSI_C
75 #define setbinmode(fp) fsetbin(fp)
76 #elif defined(__DJGPP__)
77 #include <libc/dosio.h>
78 #define setbinmode(fp) djgpp_setbinmode(fp)
79 #else /* Microsoft C, Turbo C */
80 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
81 #endif
82 #else /* UNIX */
83 #define setbinmode(fp)
84 #endif
85
86 #if defined(__DJGPP__)
87 void  djgpp_setbinmode(FILE *fp)
88 {
89     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
90     int fd, m;
91     fd = fileno(fp);
92     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
93     __file_handle_set(fd, m);
94 }
95 #endif
96
97 #ifdef _IOFBF /* SysV and MSDOS, Windows */
98 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
99 #else /* BSD */
100 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
101 #endif
102
103 /*Borland C++ 4.5 EasyWin*/
104 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
105 #define         EASYWIN
106 #ifndef __WIN16__
107 #define __WIN16__
108 #endif
109 #include <windows.h>
110 #endif
111
112 #ifdef OVERWRITE
113 /* added by satoru@isoternet.org */
114 #if defined(__EMX__)
115 #include <sys/types.h>
116 #endif
117 #include <sys/stat.h>
118 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
119 #include <unistd.h>
120 #if defined(__WATCOMC__)
121 #include <sys/utime.h>
122 #else
123 #include <utime.h>
124 #endif
125 #else /* defined(MSDOS) */
126 #ifdef __WIN32__
127 #ifdef __BORLANDC__ /* BCC32 */
128 #include <utime.h>
129 #else /* !defined(__BORLANDC__) */
130 #include <sys/utime.h>
131 #endif /* (__BORLANDC__) */
132 #else /* !defined(__WIN32__) */
133 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
134 #include <sys/utime.h>
135 #elif defined(__TURBOC__) /* BCC */
136 #include <utime.h>
137 #elif defined(LSI_C) /* LSI C */
138 #endif /* (__WIN32__) */
139 #endif
140 #endif
141 #endif
142
143 #define         FALSE   0
144 #define         TRUE    1
145
146 /* state of output_mode and input_mode
147
148    c2           0 means ASCII
149                 X0201
150                 ISO8859_1
151                 X0208
152                 EOF      all termination
153    c1           32bit data
154
155  */
156
157 #define         ASCII           0
158 #define         X0208           1
159 #define         X0201           2
160 #define         ISO8859_1       8
161 #define         NO_X0201        3
162 #define         X0212      0x2844
163 #define         X0213_1    0x284F
164 #define         X0213_2    0x2850
165
166 /* Input Assumption */
167
168 #define         JIS_INPUT       4
169 #define         EUC_INPUT      16
170 #define         SJIS_INPUT      5
171 #define         LATIN1_INPUT    6
172 #define         FIXED_MIME      7
173 #define         STRICT_MIME     8
174
175 /* MIME ENCODE */
176
177 #define         ISO2022JP       9
178 #define         JAPANESE_EUC   10
179 #define         SHIFT_JIS      11
180
181 #define         UTF8           12
182 #define         UTF8_INPUT     13
183 #define         UTF16_INPUT    1015
184 #define         UTF32_INPUT    1017
185
186 /* byte order */
187
188 #define         ENDIAN_BIG      1234
189 #define         ENDIAN_LITTLE   4321
190 #define         ENDIAN_2143     2143
191 #define         ENDIAN_3412     3412
192
193 #define         WISH_TRUE      15
194
195 /* ASCII CODE */
196
197 #define         BS      0x08
198 #define         TAB     0x09
199 #define         LF      0x0a
200 #define         CR      0x0d
201 #define         ESC     0x1b
202 #define         SP      0x20
203 #define         AT      0x40
204 #define         SSP     0xa0
205 #define         DEL     0x7f
206 #define         SI      0x0f
207 #define         SO      0x0e
208 #define         SSO     0x8e
209 #define         SS3     0x8f
210 #define         CRLF    0x0D0A
211
212 #define         is_alnum(c)  \
213             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
214
215 /* I don't trust portablity of toupper */
216 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
217 #define nkf_isoctal(c)  ('0'<=c && c<='7')
218 #define nkf_isdigit(c)  ('0'<=c && c<='9')
219 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
220 #define nkf_isblank(c) (c == SP || c == TAB)
221 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
222 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
223 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
224 #define nkf_isprint(c) (SP<=c && c<='~')
225 #define nkf_isgraph(c) ('!'<=c && c<='~')
226 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
227                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
228                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
229 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
230 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
231 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
232     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
233      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
234
235 #define CP932_TABLE_BEGIN 0xFA
236 #define CP932_TABLE_END   0xFC
237 #define CP932INV_TABLE_BEGIN 0xED
238 #define CP932INV_TABLE_END   0xEE
239 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
240
241 #define         HOLD_SIZE       1024
242 #if defined(INT_IS_SHORT)
243 #define         IOBUF_SIZE      2048
244 #else
245 #define         IOBUF_SIZE      16384
246 #endif
247
248 #define         DEFAULT_J       'B'
249 #define         DEFAULT_R       'B'
250
251 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
252 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
253
254 #define         RANGE_NUM_MAX   18
255 #define         GETA1   0x22
256 #define         GETA2   0x2e
257
258
259 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
260 #define sizeof_euc_to_utf8_1byte 94
261 #define sizeof_euc_to_utf8_2bytes 94
262 #define sizeof_utf8_to_euc_C2 64
263 #define sizeof_utf8_to_euc_E5B8 64
264 #define sizeof_utf8_to_euc_2bytes 112
265 #define sizeof_utf8_to_euc_3bytes 16
266 #endif
267
268 /* MIME preprocessor */
269
270 #ifdef EASYWIN /*Easy Win */
271 extern POINT _BufferSize;
272 #endif
273
274 struct input_code{
275     char *name;
276     nkf_char stat;
277     nkf_char score;
278     nkf_char index;
279     nkf_char buf[3];
280     void (*status_func)(struct input_code *, nkf_char);
281     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
282     int _file_stat;
283 };
284
285 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
286
287 #ifndef PERL_XS
288 static const char *CopyRight = COPY_RIGHT;
289 #endif
290 #if !defined(PERL_XS) && !defined(WIN32DLL)
291 static  nkf_char     noconvert(FILE *f);
292 #endif
293 static  void    module_connection(void);
294 static  nkf_char     kanji_convert(FILE *f);
295 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
296 static  nkf_char     push_hold_buf(nkf_char c2);
297 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
298 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
299 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
300 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
301 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
302 /* UCS Mapping
303  * 0: Shift_JIS, eucJP-ascii
304  * 1: eucJP-ms
305  * 2: CP932, CP51932
306  * 3: CP10001
307  */
308 #define UCS_MAP_ASCII   0
309 #define UCS_MAP_MS      1
310 #define UCS_MAP_CP932   2
311 #define UCS_MAP_CP10001 3
312 static int ms_ucs_map_f = UCS_MAP_ASCII;
313 #endif
314 #ifdef UTF8_INPUT_ENABLE
315 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
316 static  int     no_cp932ext_f = FALSE;
317 /* ignore ZERO WIDTH NO-BREAK SPACE */
318 static  int     no_best_fit_chars_f = FALSE;
319 static  int     input_endian = ENDIAN_BIG;
320 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
321 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
322 static  void    encode_fallback_html(nkf_char c);
323 static  void    encode_fallback_xml(nkf_char c);
324 static  void    encode_fallback_java(nkf_char c);
325 static  void    encode_fallback_perl(nkf_char c);
326 static  void    encode_fallback_subchar(nkf_char c);
327 static  void    (*encode_fallback)(nkf_char c) = NULL;
328 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
329 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
330 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
331 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
332 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
333 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
334 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
335 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
336 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
337 static  void    w_status(struct input_code *, nkf_char);
338 #endif
339 #ifdef UTF8_OUTPUT_ENABLE
340 static  int     output_bom_f = FALSE;
341 static  int     output_endian = ENDIAN_BIG;
342 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
343 static  void    w_oconv(nkf_char c2,nkf_char c1);
344 static  void    w_oconv16(nkf_char c2,nkf_char c1);
345 static  void    w_oconv32(nkf_char c2,nkf_char c1);
346 #endif
347 static  void    e_oconv(nkf_char c2,nkf_char c1);
348 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
349 static  void    s_oconv(nkf_char c2,nkf_char c1);
350 static  void    j_oconv(nkf_char c2,nkf_char c1);
351 static  void    fold_conv(nkf_char c2,nkf_char c1);
352 static  void    nl_conv(nkf_char c2,nkf_char c1);
353 static  void    z_conv(nkf_char c2,nkf_char c1);
354 static  void    rot_conv(nkf_char c2,nkf_char c1);
355 static  void    hira_conv(nkf_char c2,nkf_char c1);
356 static  void    base64_conv(nkf_char c2,nkf_char c1);
357 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
358 static  void    no_connection(nkf_char c2,nkf_char c1);
359 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
360
361 static  void    code_score(struct input_code *ptr);
362 static  void    code_status(nkf_char c);
363
364 static  void    std_putc(nkf_char c);
365 static  nkf_char     std_getc(FILE *f);
366 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
367
368 static  nkf_char     broken_getc(FILE *f);
369 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
370
371 static  nkf_char     mime_begin(FILE *f);
372 static  nkf_char     mime_getc(FILE *f);
373 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
374
375 static  void    switch_mime_getc(void);
376 static  void    unswitch_mime_getc(void);
377 static  nkf_char     mime_begin_strict(FILE *f);
378 static  nkf_char     mime_getc_buf(FILE *f);
379 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
380 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
381
382 static  nkf_char     base64decode(nkf_char c);
383 static  void    mime_prechar(nkf_char c2, nkf_char c1);
384 static  void    mime_putc(nkf_char c);
385 static  void    open_mime(nkf_char c);
386 static  void    close_mime(void);
387 static  void    eof_mime(void);
388 static  void    mimeout_addchar(nkf_char c);
389 #ifndef PERL_XS
390 static  void    usage(void);
391 static  void    version(void);
392 #endif
393 static  void    options(unsigned char *c);
394 static  void    reinit(void);
395
396 /* buffers */
397
398 #if !defined(PERL_XS) && !defined(WIN32DLL)
399 static unsigned char   stdibuf[IOBUF_SIZE];
400 static unsigned char   stdobuf[IOBUF_SIZE];
401 #endif
402 static unsigned char   hold_buf[HOLD_SIZE*2];
403 static int             hold_count = 0;
404
405 /* MIME preprocessor fifo */
406
407 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
408 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
409 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
410 static unsigned char           mime_buf[MIME_BUF_SIZE];
411 static unsigned int            mime_top = 0;
412 static unsigned int            mime_last = 0;  /* decoded */
413 static unsigned int            mime_input = 0; /* undecoded */
414 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
415
416 /* flags */
417 static int             unbuf_f = FALSE;
418 static int             estab_f = FALSE;
419 static int             nop_f = FALSE;
420 static int             binmode_f = TRUE;       /* binary mode */
421 static int             rot_f = FALSE;          /* rot14/43 mode */
422 static int             hira_f = FALSE;          /* hira/kata henkan */
423 static int             input_f = FALSE;        /* non fixed input code  */
424 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
425 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
426 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
427 static int             mimebuf_f = FALSE;      /* MIME buffered input */
428 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
429 static int             iso8859_f = FALSE;      /* ISO8859 through */
430 static int             mimeout_f = FALSE;       /* base64 mode */
431 #if defined(MSDOS) || defined(__OS2__)
432 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
433 #else
434 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
435 #endif
436 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
437
438 #ifdef UNICODE_NORMALIZATION
439 static int nfc_f = FALSE;
440 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
441 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
442 static nkf_char nfc_getc(FILE *f);
443 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
444 #endif
445
446 #ifdef INPUT_OPTION
447 static int cap_f = FALSE;
448 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
449 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
450 static nkf_char cap_getc(FILE *f);
451 static nkf_char cap_ungetc(nkf_char c,FILE *f);
452
453 static int url_f = FALSE;
454 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
455 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
456 static nkf_char url_getc(FILE *f);
457 static nkf_char url_ungetc(nkf_char c,FILE *f);
458 #endif
459
460 #if defined(INT_IS_SHORT)
461 #define NKF_INT32_C(n)   (n##L)
462 #else
463 #define NKF_INT32_C(n)   (n)
464 #endif
465 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
466 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
467 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
468 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
469 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
470 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
471 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
472
473 #ifdef NUMCHAR_OPTION
474 static int numchar_f = FALSE;
475 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
476 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
477 static nkf_char numchar_getc(FILE *f);
478 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
479 #endif
480
481 #ifdef CHECK_OPTION
482 static int noout_f = FALSE;
483 static void no_putc(nkf_char c);
484 static int debug_f = FALSE;
485 static void debug(const char *str);
486 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
487 #endif
488
489 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
490 #if !defined PERL_XS
491 static  void    print_guessed_code(char *filename);
492 #endif
493 static  void    set_input_codename(char *codename);
494
495 #ifdef EXEC_IO
496 static int exec_f = 0;
497 #endif
498
499 #ifdef SHIFTJIS_CP932
500 /* invert IBM extended characters to others */
501 static int cp51932_f = FALSE;
502
503 /* invert NEC-selected IBM extended characters to IBM extended characters */
504 static int cp932inv_f = TRUE;
505
506 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
507 #endif /* SHIFTJIS_CP932 */
508
509 #ifdef X0212_ENABLE
510 static int x0212_f = FALSE;
511 static nkf_char x0212_shift(nkf_char c);
512 static nkf_char x0212_unshift(nkf_char c);
513 #endif
514 static int x0213_f = FALSE;
515
516 static unsigned char prefix_table[256];
517
518 static void set_code_score(struct input_code *ptr, nkf_char score);
519 static void clr_code_score(struct input_code *ptr, nkf_char score);
520 static void status_disable(struct input_code *ptr);
521 static void status_push_ch(struct input_code *ptr, nkf_char c);
522 static void status_clear(struct input_code *ptr);
523 static void status_reset(struct input_code *ptr);
524 static void status_reinit(struct input_code *ptr);
525 static void status_check(struct input_code *ptr, nkf_char c);
526 static void e_status(struct input_code *, nkf_char);
527 static void s_status(struct input_code *, nkf_char);
528
529 struct input_code input_code_list[] = {
530     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
531     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
532 #ifdef UTF8_INPUT_ENABLE
533     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
534     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
535     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
536 #endif
537     {0}
538 };
539
540 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
541 static int              base64_count = 0;
542
543 /* X0208 -> ASCII converter */
544
545 /* fold parameter */
546 static int             f_line = 0;    /* chars in line */
547 static int             f_prev = 0;
548 static int             fold_preserve_f = FALSE; /* preserve new lines */
549 static int             fold_f  = FALSE;
550 static int             fold_len  = 0;
551
552 /* options */
553 static unsigned char   kanji_intro = DEFAULT_J;
554 static unsigned char   ascii_intro = DEFAULT_R;
555
556 /* Folding */
557
558 #define FOLD_MARGIN  10
559 #define DEFAULT_FOLD 60
560
561 static int             fold_margin  = FOLD_MARGIN;
562
563 /* converters */
564
565 #ifdef DEFAULT_CODE_JIS
566 #   define  DEFAULT_CONV j_oconv
567 #endif
568 #ifdef DEFAULT_CODE_SJIS
569 #   define  DEFAULT_CONV s_oconv
570 #endif
571 #ifdef DEFAULT_CODE_EUC
572 #   define  DEFAULT_CONV e_oconv
573 #endif
574 #ifdef DEFAULT_CODE_UTF8
575 #   define  DEFAULT_CONV w_oconv
576 #endif
577
578 /* process default */
579 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
580
581 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
582 /* s_iconv or oconv */
583 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
584
585 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
586 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
587 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
588 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
589 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
590 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
591 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
592
593 /* static redirections */
594
595 static  void   (*o_putc)(nkf_char c) = std_putc;
596
597 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
598 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
599
600 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
601 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
602
603 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
604
605 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
606 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
607
608 /* for strict mime */
609 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
610 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
611
612 /* Global states */
613 static int output_mode = ASCII,    /* output kanji mode */
614            input_mode =  ASCII,    /* input kanji mode */
615            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
616 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
617
618 /* X0201 / X0208 conversion tables */
619
620 /* X0201 kana conversion table */
621 /* 90-9F A0-DF */
622 static const unsigned char cv[]= {
623     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
624     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
625     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
626     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
627     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
628     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
629     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
630     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
631     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
632     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
633     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
634     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
635     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
636     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
637     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
638     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
639     0x00,0x00};
640
641
642 /* X0201 kana conversion table for daguten */
643 /* 90-9F A0-DF */
644 static const unsigned char dv[]= {
645     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
646     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
647     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
648     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
649     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
650     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
651     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
652     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
653     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
654     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
655     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
656     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
657     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
658     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
659     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
660     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
661     0x00,0x00};
662
663 /* X0201 kana conversion table for han-daguten */
664 /* 90-9F A0-DF */
665 static const unsigned char ev[]= {
666     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
672     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
674     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
675     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
677     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
678     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
679     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
680     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
681     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
682     0x00,0x00};
683
684
685 /* X0208 kigou conversion table */
686 /* 0x8140 - 0x819e */
687 static const unsigned char fv[] = {
688
689     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
690     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
691     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
692     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
693     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
694     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
695     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
696     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
697     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
698     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
699     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
700     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
701 } ;
702
703
704
705 static int             file_out_f = FALSE;
706 #ifdef OVERWRITE
707 static int             overwrite_f = FALSE;
708 static int             preserve_time_f = FALSE;
709 static int             backup_f = FALSE;
710 static char            *backup_suffix = "";
711 static char *get_backup_filename(const char *suffix, const char *filename);
712 #endif
713
714 static int nlmode_f = 0;   /* CR, LF, CRLF */
715 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
716 static nkf_char prev_cr = 0; /* CR or 0 */
717 #ifdef EASYWIN /*Easy Win */
718 static int             end_check;
719 #endif /*Easy Win */
720
721 #define STD_GC_BUFSIZE (256)
722 nkf_char std_gc_buf[STD_GC_BUFSIZE];
723 nkf_char std_gc_ndx;
724
725 #ifdef WIN32DLL
726 #include "nkf32dll.c"
727 #elif defined(PERL_XS)
728 #else /* WIN32DLL */
729 int main(int argc, char **argv)
730 {
731     FILE  *fin;
732     unsigned char  *cp;
733
734     char *outfname = NULL;
735     char *origfname;
736
737 #ifdef EASYWIN /*Easy Win */
738     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
739 #endif
740
741     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
742         cp = (unsigned char *)*argv;
743         options(cp);
744         if (guess_f) {
745 #ifdef CHECK_OPTION
746             int debug_f_back = debug_f;
747 #endif
748 #ifdef EXEC_IO
749             int exec_f_back = exec_f;
750 #endif
751 #ifdef X0212_ENABLE
752             int x0212_f_back = x0212_f;
753 #endif
754 #ifdef X0212_ENABLE
755             int x0213_f_back = x0213_f;
756 #endif
757             int guess_f_back = guess_f;
758             reinit();
759             guess_f = guess_f_back;
760             mime_f = FALSE;
761 #ifdef CHECK_OPTION
762             debug_f = debug_f_back;
763 #endif
764 #ifdef EXEC_IO
765             exec_f = exec_f_back;
766 #endif
767 #ifdef X0212_ENABLE
768             x0212_f = x0212_f_back;
769 #endif
770 #ifdef X0213_ENABLE
771             x0213_f = x0213_f_back;
772 #endif
773         }
774 #ifdef EXEC_IO
775         if (exec_f){
776             int fds[2], pid;
777             if (pipe(fds) < 0 || (pid = fork()) < 0){
778                 abort();
779             }
780             if (pid == 0){
781                 if (exec_f > 0){
782                     close(fds[0]);
783                     dup2(fds[1], 1);
784                 }else{
785                     close(fds[1]);
786                     dup2(fds[0], 0);
787                 }
788                 execvp(argv[1], &argv[1]);
789             }
790             if (exec_f > 0){
791                 close(fds[1]);
792                 dup2(fds[0], 0);
793             }else{
794                 close(fds[0]);
795                 dup2(fds[1], 1);
796             }
797             argc = 0;
798             break;
799         }
800 #endif
801     }
802     if(x0201_f == WISH_TRUE)
803          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
804
805     if (binmode_f == TRUE)
806 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
807     if (freopen("","wb",stdout) == NULL)
808         return (-1);
809 #else
810     setbinmode(stdout);
811 #endif
812
813     if (unbuf_f)
814       setbuf(stdout, (char *) NULL);
815     else
816       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
817
818     if (argc == 0) {
819       if (binmode_f == TRUE)
820 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
821       if (freopen("","rb",stdin) == NULL) return (-1);
822 #else
823       setbinmode(stdin);
824 #endif
825       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
826       if (nop_f)
827           noconvert(stdin);
828       else {
829           kanji_convert(stdin);
830           if (guess_f) print_guessed_code(NULL);
831       }
832     } else {
833       int nfiles = argc;
834         int is_argument_error = FALSE;
835       while (argc--) {
836             input_codename = NULL;
837             input_newline = 0;
838 #ifdef CHECK_OPTION
839             iconv_for_check = 0;
840 #endif
841           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
842               perror(*--argv);
843                 *argv++;
844                 is_argument_error = TRUE;
845                 continue;
846           } else {
847 #ifdef OVERWRITE
848               int fd = 0;
849               int fd_backup = 0;
850 #endif
851
852 /* reopen file for stdout */
853               if (file_out_f == TRUE) {
854 #ifdef OVERWRITE
855                   if (overwrite_f){
856                       outfname = malloc(strlen(origfname)
857                                         + strlen(".nkftmpXXXXXX")
858                                         + 1);
859                       if (!outfname){
860                           perror(origfname);
861                           return -1;
862                       }
863                       strcpy(outfname, origfname);
864 #ifdef MSDOS
865                       {
866                           int i;
867                           for (i = strlen(outfname); i; --i){
868                               if (outfname[i - 1] == '/'
869                                   || outfname[i - 1] == '\\'){
870                                   break;
871                               }
872                           }
873                           outfname[i] = '\0';
874                       }
875                       strcat(outfname, "ntXXXXXX");
876                       mktemp(outfname);
877                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
878                                 S_IREAD | S_IWRITE);
879 #else
880                       strcat(outfname, ".nkftmpXXXXXX");
881                       fd = mkstemp(outfname);
882 #endif
883                       if (fd < 0
884                           || (fd_backup = dup(fileno(stdout))) < 0
885                           || dup2(fd, fileno(stdout)) < 0
886                           ){
887                           perror(origfname);
888                           return -1;
889                       }
890                   }else
891 #endif
892                   if(argc == 1) {
893                       outfname = *argv++;
894                       argc--;
895                   } else {
896                       outfname = "nkf.out";
897                   }
898
899                   if(freopen(outfname, "w", stdout) == NULL) {
900                       perror (outfname);
901                       return (-1);
902                   }
903                   if (binmode_f == TRUE) {
904 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
905                       if (freopen("","wb",stdout) == NULL)
906                            return (-1);
907 #else
908                       setbinmode(stdout);
909 #endif
910                   }
911               }
912               if (binmode_f == TRUE)
913 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
914                  if (freopen("","rb",fin) == NULL)
915                     return (-1);
916 #else
917                  setbinmode(fin);
918 #endif
919               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
920               if (nop_f)
921                   noconvert(fin);
922               else {
923                   char *filename = NULL;
924                   kanji_convert(fin);
925                   if (nfiles > 1) filename = origfname;
926                   if (guess_f) print_guessed_code(filename);
927               }
928               fclose(fin);
929 #ifdef OVERWRITE
930               if (overwrite_f) {
931                   struct stat     sb;
932 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
933                   time_t tb[2];
934 #else
935                   struct utimbuf  tb;
936 #endif
937
938                   fflush(stdout);
939                   close(fd);
940                   if (dup2(fd_backup, fileno(stdout)) < 0){
941                       perror("dup2");
942                   }
943                   if (stat(origfname, &sb)) {
944                       fprintf(stderr, "Can't stat %s\n", origfname);
945                   }
946                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
947                   if (chmod(outfname, sb.st_mode)) {
948                       fprintf(stderr, "Can't set permission %s\n", outfname);
949                   }
950
951                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
952                     if(preserve_time_f){
953 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
954                         tb[0] = tb[1] = sb.st_mtime;
955                         if (utime(outfname, tb)) {
956                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
957                         }
958 #else
959                         tb.actime  = sb.st_atime;
960                         tb.modtime = sb.st_mtime;
961                         if (utime(outfname, &tb)) {
962                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
963                         }
964 #endif
965                     }
966                     if(backup_f){
967                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
968 #ifdef MSDOS
969                         unlink(backup_filename);
970 #endif
971                         if (rename(origfname, backup_filename)) {
972                             perror(backup_filename);
973                             fprintf(stderr, "Can't rename %s to %s\n",
974                                     origfname, backup_filename);
975                         }
976                     }else{
977 #ifdef MSDOS
978                         if (unlink(origfname)){
979                             perror(origfname);
980                         }
981 #endif
982                     }
983                   if (rename(outfname, origfname)) {
984                       perror(origfname);
985                       fprintf(stderr, "Can't rename %s to %s\n",
986                               outfname, origfname);
987                   }
988                   free(outfname);
989               }
990 #endif
991           }
992       }
993         if (is_argument_error)
994             return(-1);
995     }
996 #ifdef EASYWIN /*Easy Win */
997     if (file_out_f == FALSE)
998         scanf("%d",&end_check);
999     else
1000         fclose(stdout);
1001 #else /* for Other OS */
1002     if (file_out_f == TRUE)
1003         fclose(stdout);
1004 #endif /*Easy Win */
1005     return (0);
1006 }
1007 #endif /* WIN32DLL */
1008
1009 #ifdef OVERWRITE
1010 char *get_backup_filename(const char *suffix, const char *filename)
1011 {
1012     char *backup_filename;
1013     int asterisk_count = 0;
1014     int i, j;
1015     int filename_length = strlen(filename);
1016
1017     for(i = 0; suffix[i]; i++){
1018         if(suffix[i] == '*') asterisk_count++;
1019     }
1020
1021     if(asterisk_count){
1022         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1023         if (!backup_filename){
1024             perror("Can't malloc backup filename.");
1025             return NULL;
1026         }
1027
1028         for(i = 0, j = 0; suffix[i];){
1029             if(suffix[i] == '*'){
1030                 backup_filename[j] = '\0';
1031                 strncat(backup_filename, filename, filename_length);
1032                 i++;
1033                 j += filename_length;
1034             }else{
1035                 backup_filename[j++] = suffix[i++];
1036             }
1037         }
1038         backup_filename[j] = '\0';
1039     }else{
1040         j = strlen(suffix) + filename_length;
1041         backup_filename = malloc( + 1);
1042         strcpy(backup_filename, filename);
1043         strcat(backup_filename, suffix);
1044         backup_filename[j] = '\0';
1045     }
1046     return backup_filename;
1047 }
1048 #endif
1049
1050 static const struct {
1051     const char *name;
1052     const char *alias;
1053 } long_option[] = {
1054     {"ic=", ""},
1055     {"oc=", ""},
1056     {"base64","jMB"},
1057     {"euc","e"},
1058     {"euc-input","E"},
1059     {"fj","jm"},
1060     {"help","v"},
1061     {"jis","j"},
1062     {"jis-input","J"},
1063     {"mac","sLm"},
1064     {"mime","jM"},
1065     {"mime-input","m"},
1066     {"msdos","sLw"},
1067     {"sjis","s"},
1068     {"sjis-input","S"},
1069     {"unix","eLu"},
1070     {"version","V"},
1071     {"windows","sLw"},
1072     {"hiragana","h1"},
1073     {"katakana","h2"},
1074     {"katakana-hiragana","h3"},
1075     {"guess=", ""},
1076     {"guess", "g"},
1077     {"cp932", ""},
1078     {"no-cp932", ""},
1079 #ifdef X0212_ENABLE
1080     {"x0212", ""},
1081 #endif
1082 #ifdef UTF8_OUTPUT_ENABLE
1083     {"utf8", "w"},
1084     {"utf16", "w16"},
1085     {"ms-ucs-map", ""},
1086     {"fb-skip", ""},
1087     {"fb-html", ""},
1088     {"fb-xml", ""},
1089     {"fb-perl", ""},
1090     {"fb-java", ""},
1091     {"fb-subchar", ""},
1092     {"fb-subchar=", ""},
1093 #endif
1094 #ifdef UTF8_INPUT_ENABLE
1095     {"utf8-input", "W"},
1096     {"utf16-input", "W16"},
1097     {"no-cp932ext", ""},
1098     {"no-best-fit-chars",""},
1099 #endif
1100 #ifdef UNICODE_NORMALIZATION
1101     {"utf8mac-input", ""},
1102 #endif
1103 #ifdef OVERWRITE
1104     {"overwrite", ""},
1105     {"overwrite=", ""},
1106     {"in-place", ""},
1107     {"in-place=", ""},
1108 #endif
1109 #ifdef INPUT_OPTION
1110     {"cap-input", ""},
1111     {"url-input", ""},
1112 #endif
1113 #ifdef NUMCHAR_OPTION
1114     {"numchar-input", ""},
1115 #endif
1116 #ifdef CHECK_OPTION
1117     {"no-output", ""},
1118     {"debug", ""},
1119 #endif
1120 #ifdef SHIFTJIS_CP932
1121     {"cp932inv", ""},
1122 #endif
1123 #ifdef EXEC_IO
1124     {"exec-in", ""},
1125     {"exec-out", ""},
1126 #endif
1127     {"prefix=", ""},
1128 };
1129
1130 static int option_mode = 0;
1131
1132 void options(unsigned char *cp)
1133 {
1134     nkf_char i, j;
1135     unsigned char *p;
1136     unsigned char *cp_back = NULL;
1137     char codeset[32];
1138
1139     if (option_mode==1)
1140         return;
1141     while(*cp && *cp++!='-');
1142     while (*cp || cp_back) {
1143         if(!*cp){
1144             cp = cp_back;
1145             cp_back = NULL;
1146             continue;
1147         }
1148         p = 0;
1149         switch (*cp++) {
1150         case '-':  /* literal options */
1151             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1152                 option_mode = 1;
1153                 return;
1154             }
1155             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1156                 p = (unsigned char *)long_option[i].name;
1157                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1158                 if (*p == cp[j] || cp[j] == SP){
1159                     p = &cp[j] + 1;
1160                     break;
1161                 }
1162                 p = 0;
1163             }
1164             if (p == 0) {
1165                 fprintf(stderr, "unknown long option: --%s\n", cp);
1166                 return;
1167             }
1168             while(*cp && *cp != SP && cp++);
1169             if (long_option[i].alias[0]){
1170                 cp_back = cp;
1171                 cp = (unsigned char *)long_option[i].alias;
1172             }else{
1173                 if (strcmp(long_option[i].name, "ic=") == 0){
1174                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1175                         codeset[i] = nkf_toupper(p[i]);
1176                     }
1177                     codeset[i] = 0;
1178                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1179                         input_f = JIS_INPUT;
1180                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1181                       strcmp(codeset, "CP50220") == 0 ||
1182                       strcmp(codeset, "CP50221") == 0 ||
1183                       strcmp(codeset, "CP50222") == 0){
1184                         input_f = JIS_INPUT;
1185 #ifdef SHIFTJIS_CP932
1186                         cp51932_f = TRUE;
1187 #endif
1188 #ifdef UTF8_OUTPUT_ENABLE
1189                         ms_ucs_map_f = UCS_MAP_CP932;
1190 #endif
1191                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1192                         input_f = JIS_INPUT;
1193 #ifdef X0212_ENABLE
1194                         x0212_f = TRUE;
1195 #endif
1196                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1197                         input_f = JIS_INPUT;
1198 #ifdef X0212_ENABLE
1199                         x0212_f = TRUE;
1200 #endif
1201                         x0213_f = TRUE;
1202                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1203                         input_f = SJIS_INPUT;
1204                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1205                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1206                              strcmp(codeset, "CP932") == 0 ||
1207                              strcmp(codeset, "MS932") == 0){
1208                         input_f = SJIS_INPUT;
1209 #ifdef SHIFTJIS_CP932
1210                         cp51932_f = TRUE;
1211 #endif
1212 #ifdef UTF8_OUTPUT_ENABLE
1213                         ms_ucs_map_f = UCS_MAP_CP932;
1214 #endif
1215                     }else if(strcmp(codeset, "CP10001") == 0){
1216                         input_f = SJIS_INPUT;
1217 #ifdef SHIFTJIS_CP932
1218                         cp51932_f = TRUE;
1219 #endif
1220 #ifdef UTF8_OUTPUT_ENABLE
1221                         ms_ucs_map_f = UCS_MAP_CP10001;
1222 #endif
1223                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1224                              strcmp(codeset, "EUC-JP") == 0){
1225                         input_f = EUC_INPUT;
1226                     }else if(strcmp(codeset, "CP51932") == 0){
1227                         input_f = EUC_INPUT;
1228 #ifdef SHIFTJIS_CP932
1229                         cp51932_f = TRUE;
1230 #endif
1231 #ifdef UTF8_OUTPUT_ENABLE
1232                         ms_ucs_map_f = UCS_MAP_CP932;
1233 #endif
1234                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1235                              strcmp(codeset, "EUCJP-MS") == 0 ||
1236                              strcmp(codeset, "EUCJPMS") == 0){
1237                         input_f = EUC_INPUT;
1238 #ifdef SHIFTJIS_CP932
1239                         cp51932_f = FALSE;
1240 #endif
1241 #ifdef UTF8_OUTPUT_ENABLE
1242                         ms_ucs_map_f = UCS_MAP_MS;
1243 #endif
1244                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1245                              strcmp(codeset, "EUCJP-ASCII") == 0){
1246                         input_f = EUC_INPUT;
1247 #ifdef SHIFTJIS_CP932
1248                         cp51932_f = FALSE;
1249 #endif
1250 #ifdef UTF8_OUTPUT_ENABLE
1251                         ms_ucs_map_f = UCS_MAP_ASCII;
1252 #endif
1253                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1254                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1255                         input_f = SJIS_INPUT;
1256                         x0213_f = TRUE;
1257 #ifdef SHIFTJIS_CP932
1258                         cp51932_f = FALSE;
1259 #endif
1260                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1261                              strcmp(codeset, "EUC-JIS-2004") == 0){
1262                         input_f = EUC_INPUT;
1263                         x0213_f = TRUE;
1264 #ifdef SHIFTJIS_CP932
1265                         cp51932_f = FALSE;
1266 #endif
1267 #ifdef UTF8_INPUT_ENABLE
1268                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1269                              strcmp(codeset, "UTF-8N") == 0 ||
1270                              strcmp(codeset, "UTF-8-BOM") == 0){
1271                         input_f = UTF8_INPUT;
1272 #ifdef UNICODE_NORMALIZATION
1273                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1274                              strcmp(codeset, "UTF-8-MAC") == 0){
1275                         input_f = UTF8_INPUT;
1276                         nfc_f = TRUE;
1277 #endif
1278                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1279                              strcmp(codeset, "UTF-16BE") == 0 ||
1280                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1281                         input_f = UTF16_INPUT;
1282                         input_endian = ENDIAN_BIG;
1283                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1284                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1285                         input_f = UTF16_INPUT;
1286                         input_endian = ENDIAN_LITTLE;
1287                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1288                              strcmp(codeset, "UTF-32BE") == 0 ||
1289                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1290                         input_f = UTF32_INPUT;
1291                         input_endian = ENDIAN_BIG;
1292                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1293                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1294                         input_f = UTF32_INPUT;
1295                         input_endian = ENDIAN_LITTLE;
1296 #endif
1297                     } else {
1298                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1299                     }
1300                     continue;
1301                 }
1302                 if (strcmp(long_option[i].name, "oc=") == 0){
1303                     x0201_f = FALSE;
1304                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1305                         codeset[i] = nkf_toupper(p[i]);
1306                     }
1307                     codeset[i] = 0;
1308                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1309                         output_conv = j_oconv;
1310                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1311                         output_conv = j_oconv;
1312                         no_cp932ext_f = TRUE;
1313 #ifdef SHIFTJIS_CP932
1314                         cp932inv_f = FALSE;
1315 #endif
1316 #ifdef UTF8_OUTPUT_ENABLE
1317                         ms_ucs_map_f = UCS_MAP_CP932;
1318 #endif
1319                     }else if(strcmp(codeset, "CP50220") == 0){
1320                         output_conv = j_oconv;
1321                         x0201_f = TRUE;
1322 #ifdef SHIFTJIS_CP932
1323                         cp932inv_f = FALSE;
1324 #endif
1325 #ifdef UTF8_OUTPUT_ENABLE
1326                         ms_ucs_map_f = UCS_MAP_CP932;
1327 #endif
1328                     }else if(strcmp(codeset, "CP50221") == 0){
1329                         output_conv = j_oconv;
1330 #ifdef SHIFTJIS_CP932
1331                         cp932inv_f = FALSE;
1332 #endif
1333 #ifdef UTF8_OUTPUT_ENABLE
1334                         ms_ucs_map_f = UCS_MAP_CP932;
1335 #endif
1336                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1337                         output_conv = j_oconv;
1338 #ifdef X0212_ENABLE
1339                         x0212_f = TRUE;
1340 #endif
1341 #ifdef SHIFTJIS_CP932
1342                         cp932inv_f = FALSE;
1343 #endif
1344                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1345                         output_conv = j_oconv;
1346 #ifdef X0212_ENABLE
1347                         x0212_f = TRUE;
1348 #endif
1349                         x0213_f = TRUE;
1350 #ifdef SHIFTJIS_CP932
1351                         cp932inv_f = FALSE;
1352 #endif
1353                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1354                         output_conv = s_oconv;
1355                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1356                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1357                              strcmp(codeset, "CP932") == 0 ||
1358                              strcmp(codeset, "MS932") == 0){
1359                         output_conv = s_oconv;
1360 #ifdef UTF8_OUTPUT_ENABLE
1361                         ms_ucs_map_f = UCS_MAP_CP932;
1362 #endif
1363                     }else if(strcmp(codeset, "CP10001") == 0){
1364                         output_conv = s_oconv;
1365 #ifdef UTF8_OUTPUT_ENABLE
1366                         ms_ucs_map_f = UCS_MAP_CP10001;
1367 #endif
1368                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1369                              strcmp(codeset, "EUC-JP") == 0){
1370                         output_conv = e_oconv;
1371                     }else if(strcmp(codeset, "CP51932") == 0){
1372                         output_conv = e_oconv;
1373 #ifdef SHIFTJIS_CP932
1374                         cp932inv_f = FALSE;
1375 #endif
1376 #ifdef UTF8_OUTPUT_ENABLE
1377                         ms_ucs_map_f = UCS_MAP_CP932;
1378 #endif
1379                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1380                              strcmp(codeset, "EUCJP-MS") == 0 ||
1381                              strcmp(codeset, "EUCJPMS") == 0){
1382                         output_conv = e_oconv;
1383 #ifdef X0212_ENABLE
1384                         x0212_f = TRUE;
1385 #endif
1386 #ifdef UTF8_OUTPUT_ENABLE
1387                         ms_ucs_map_f = UCS_MAP_MS;
1388 #endif
1389                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1390                              strcmp(codeset, "EUCJP-ASCII") == 0){
1391                         output_conv = e_oconv;
1392 #ifdef X0212_ENABLE
1393                         x0212_f = TRUE;
1394 #endif
1395 #ifdef UTF8_OUTPUT_ENABLE
1396                         ms_ucs_map_f = UCS_MAP_ASCII;
1397 #endif
1398                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1399                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1400                         output_conv = s_oconv;
1401                         x0213_f = TRUE;
1402 #ifdef SHIFTJIS_CP932
1403                         cp932inv_f = FALSE;
1404 #endif
1405                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1406                              strcmp(codeset, "EUC-JIS-2004") == 0){
1407                         output_conv = e_oconv;
1408 #ifdef X0212_ENABLE
1409                         x0212_f = TRUE;
1410 #endif
1411                         x0213_f = TRUE;
1412 #ifdef SHIFTJIS_CP932
1413                         cp932inv_f = FALSE;
1414 #endif
1415 #ifdef UTF8_OUTPUT_ENABLE
1416                     }else if(strcmp(codeset, "UTF-8") == 0){
1417                         output_conv = w_oconv;
1418                     }else if(strcmp(codeset, "UTF-8N") == 0){
1419                         output_conv = w_oconv;
1420                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1421                         output_conv = w_oconv;
1422                         output_bom_f = TRUE;
1423                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1424                         output_conv = w_oconv16;
1425                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1426                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1427                         output_conv = w_oconv16;
1428                         output_bom_f = TRUE;
1429                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1430                         output_conv = w_oconv16;
1431                         output_endian = ENDIAN_LITTLE;
1432                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1433                         output_conv = w_oconv16;
1434                         output_endian = ENDIAN_LITTLE;
1435                         output_bom_f = TRUE;
1436                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1437                              strcmp(codeset, "UTF-32BE") == 0){
1438                         output_conv = w_oconv32;
1439                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1440                         output_conv = w_oconv32;
1441                         output_bom_f = TRUE;
1442                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1443                         output_conv = w_oconv32;
1444                         output_endian = ENDIAN_LITTLE;
1445                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1446                         output_conv = w_oconv32;
1447                         output_endian = ENDIAN_LITTLE;
1448                         output_bom_f = TRUE;
1449 #endif
1450                     } else {
1451                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1452                     }
1453                     continue;
1454                 }
1455                 if (strcmp(long_option[i].name, "guess=") == 0){
1456                     if (p[0] == '1') {
1457                         guess_f = 2;
1458                     } else {
1459                         guess_f = 1;
1460                     }
1461                     continue;
1462                 }
1463 #ifdef OVERWRITE
1464                 if (strcmp(long_option[i].name, "overwrite") == 0){
1465                     file_out_f = TRUE;
1466                     overwrite_f = TRUE;
1467                     preserve_time_f = TRUE;
1468                     continue;
1469                 }
1470                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1471                     file_out_f = TRUE;
1472                     overwrite_f = TRUE;
1473                     preserve_time_f = TRUE;
1474                     backup_f = TRUE;
1475                     backup_suffix = malloc(strlen((char *) p) + 1);
1476                     strcpy(backup_suffix, (char *) p);
1477                     continue;
1478                 }
1479                 if (strcmp(long_option[i].name, "in-place") == 0){
1480                     file_out_f = TRUE;
1481                     overwrite_f = TRUE;
1482                     preserve_time_f = FALSE;
1483                     continue;
1484                 }
1485                 if (strcmp(long_option[i].name, "in-place=") == 0){
1486                     file_out_f = TRUE;
1487                     overwrite_f = TRUE;
1488                     preserve_time_f = FALSE;
1489                     backup_f = TRUE;
1490                     backup_suffix = malloc(strlen((char *) p) + 1);
1491                     strcpy(backup_suffix, (char *) p);
1492                     continue;
1493                 }
1494 #endif
1495 #ifdef INPUT_OPTION
1496                 if (strcmp(long_option[i].name, "cap-input") == 0){
1497                     cap_f = TRUE;
1498                     continue;
1499                 }
1500                 if (strcmp(long_option[i].name, "url-input") == 0){
1501                     url_f = TRUE;
1502                     continue;
1503                 }
1504 #endif
1505 #ifdef NUMCHAR_OPTION
1506                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1507                     numchar_f = TRUE;
1508                     continue;
1509                 }
1510 #endif
1511 #ifdef CHECK_OPTION
1512                 if (strcmp(long_option[i].name, "no-output") == 0){
1513                     noout_f = TRUE;
1514                     continue;
1515                 }
1516                 if (strcmp(long_option[i].name, "debug") == 0){
1517                     debug_f = TRUE;
1518                     continue;
1519                 }
1520 #endif
1521                 if (strcmp(long_option[i].name, "cp932") == 0){
1522 #ifdef SHIFTJIS_CP932
1523                     cp51932_f = TRUE;
1524                     cp932inv_f = TRUE;
1525 #endif
1526 #ifdef UTF8_OUTPUT_ENABLE
1527                     ms_ucs_map_f = UCS_MAP_CP932;
1528 #endif
1529                     continue;
1530                 }
1531                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1532 #ifdef SHIFTJIS_CP932
1533                     cp51932_f = FALSE;
1534                     cp932inv_f = FALSE;
1535 #endif
1536 #ifdef UTF8_OUTPUT_ENABLE
1537                     ms_ucs_map_f = UCS_MAP_ASCII;
1538 #endif
1539                     continue;
1540                 }
1541 #ifdef SHIFTJIS_CP932
1542                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1543                     cp932inv_f = TRUE;
1544                     continue;
1545                 }
1546 #endif
1547
1548 #ifdef X0212_ENABLE
1549                 if (strcmp(long_option[i].name, "x0212") == 0){
1550                     x0212_f = TRUE;
1551                     continue;
1552                 }
1553 #endif
1554
1555 #ifdef EXEC_IO
1556                   if (strcmp(long_option[i].name, "exec-in") == 0){
1557                       exec_f = 1;
1558                       return;
1559                   }
1560                   if (strcmp(long_option[i].name, "exec-out") == 0){
1561                       exec_f = -1;
1562                       return;
1563                   }
1564 #endif
1565 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1566                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1567                     no_cp932ext_f = TRUE;
1568                     continue;
1569                 }
1570                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1571                     no_best_fit_chars_f = TRUE;
1572                     continue;
1573                 }
1574                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1575                     encode_fallback = NULL;
1576                     continue;
1577                 }
1578                 if (strcmp(long_option[i].name, "fb-html") == 0){
1579                     encode_fallback = encode_fallback_html;
1580                     continue;
1581                 }
1582                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1583                     encode_fallback = encode_fallback_xml;
1584                     continue;
1585                 }
1586                 if (strcmp(long_option[i].name, "fb-java") == 0){
1587                     encode_fallback = encode_fallback_java;
1588                     continue;
1589                 }
1590                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1591                     encode_fallback = encode_fallback_perl;
1592                     continue;
1593                 }
1594                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1595                     encode_fallback = encode_fallback_subchar;
1596                     continue;
1597                 }
1598                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1599                     encode_fallback = encode_fallback_subchar;
1600                     unicode_subchar = 0;
1601                     if (p[0] != '0'){
1602                         /* decimal number */
1603                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1604                             unicode_subchar *= 10;
1605                             unicode_subchar += hex2bin(p[i]);
1606                         }
1607                     }else if(p[1] == 'x' || p[1] == 'X'){
1608                         /* hexadecimal number */
1609                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1610                             unicode_subchar <<= 4;
1611                             unicode_subchar |= hex2bin(p[i]);
1612                         }
1613                     }else{
1614                         /* octal number */
1615                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1616                             unicode_subchar *= 8;
1617                             unicode_subchar += hex2bin(p[i]);
1618                         }
1619                     }
1620                     w16e_conv(unicode_subchar, &i, &j);
1621                     unicode_subchar = i<<8 | j;
1622                     continue;
1623                 }
1624 #endif
1625 #ifdef UTF8_OUTPUT_ENABLE
1626                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1627                     ms_ucs_map_f = UCS_MAP_MS;
1628                     continue;
1629                 }
1630 #endif
1631 #ifdef UNICODE_NORMALIZATION
1632                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1633                     input_f = UTF8_INPUT;
1634                     nfc_f = TRUE;
1635                     continue;
1636                 }
1637 #endif
1638                 if (strcmp(long_option[i].name, "prefix=") == 0){
1639                     if (nkf_isgraph(p[0])){
1640                         for (i = 1; nkf_isgraph(p[i]); i++){
1641                             prefix_table[p[i]] = p[0];
1642                         }
1643                     }
1644                     continue;
1645                 }
1646             }
1647             continue;
1648         case 'b':           /* buffered mode */
1649             unbuf_f = FALSE;
1650             continue;
1651         case 'u':           /* non bufferd mode */
1652             unbuf_f = TRUE;
1653             continue;
1654         case 't':           /* transparent mode */
1655             if (*cp=='1') {
1656                 /* alias of -t */
1657                 nop_f = TRUE;
1658                 *cp++;
1659             } else if (*cp=='2') {
1660                 /*
1661                  * -t with put/get
1662                  *
1663                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1664                  *
1665                  */
1666                 nop_f = 2;
1667                 *cp++;
1668             } else
1669                 nop_f = TRUE;
1670             continue;
1671         case 'j':           /* JIS output */
1672         case 'n':
1673             output_conv = j_oconv;
1674             continue;
1675         case 'e':           /* AT&T EUC output */
1676             output_conv = e_oconv;
1677             cp932inv_f = FALSE;
1678             continue;
1679         case 's':           /* SJIS output */
1680             output_conv = s_oconv;
1681             continue;
1682         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1683             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1684             input_f = LATIN1_INPUT;
1685             continue;
1686         case 'i':           /* Kanji IN ESC-$-@/B */
1687             if (*cp=='@'||*cp=='B')
1688                 kanji_intro = *cp++;
1689             continue;
1690         case 'o':           /* ASCII IN ESC-(-J/B */
1691             if (*cp=='J'||*cp=='B'||*cp=='H')
1692                 ascii_intro = *cp++;
1693             continue;
1694         case 'h':
1695             /*
1696                 bit:1   katakana->hiragana
1697                 bit:2   hiragana->katakana
1698             */
1699             if ('9'>= *cp && *cp>='0')
1700                 hira_f |= (*cp++ -'0');
1701             else
1702                 hira_f |= 1;
1703             continue;
1704         case 'r':
1705             rot_f = TRUE;
1706             continue;
1707 #if defined(MSDOS) || defined(__OS2__)
1708         case 'T':
1709             binmode_f = FALSE;
1710             continue;
1711 #endif
1712 #ifndef PERL_XS
1713         case 'V':
1714             version();
1715             exit(1);
1716             break;
1717         case 'v':
1718             usage();
1719             exit(1);
1720             break;
1721 #endif
1722 #ifdef UTF8_OUTPUT_ENABLE
1723         case 'w':           /* UTF-8 output */
1724             if (cp[0] == '8') {
1725                 output_conv = w_oconv; cp++;
1726                 if (cp[0] == '0'){
1727                     cp++;
1728                 } else {
1729                     output_bom_f = TRUE;
1730                 }
1731             } else {
1732                 if ('1'== cp[0] && '6'==cp[1]) {
1733                     output_conv = w_oconv16; cp+=2;
1734                 } else if ('3'== cp[0] && '2'==cp[1]) {
1735                     output_conv = w_oconv32; cp+=2;
1736                 } else {
1737                     output_conv = w_oconv;
1738                     continue;
1739                 }
1740                 if (cp[0]=='L') {
1741                     cp++;
1742                     output_endian = ENDIAN_LITTLE;
1743                 } else if (cp[0] == 'B') {
1744                     cp++;
1745                 } else {
1746                     continue;
1747                 }
1748                 if (cp[0] == '0'){
1749                     cp++;
1750                 } else {
1751                     output_bom_f = TRUE;
1752                 }
1753             }
1754             continue;
1755 #endif
1756 #ifdef UTF8_INPUT_ENABLE
1757         case 'W':           /* UTF input */
1758             if (cp[0] == '8') {
1759                 cp++;
1760                 input_f = UTF8_INPUT;
1761             }else{
1762                 if ('1'== cp[0] && '6'==cp[1]) {
1763                     cp += 2;
1764                     input_f = UTF16_INPUT;
1765                     input_endian = ENDIAN_BIG;
1766                 } else if ('3'== cp[0] && '2'==cp[1]) {
1767                     cp += 2;
1768                     input_f = UTF32_INPUT;
1769                     input_endian = ENDIAN_BIG;
1770                 } else {
1771                     input_f = UTF8_INPUT;
1772                     continue;
1773                 }
1774                 if (cp[0]=='L') {
1775                     cp++;
1776                     input_endian = ENDIAN_LITTLE;
1777                 } else if (cp[0] == 'B') {
1778                     cp++;
1779                 }
1780             }
1781             continue;
1782 #endif
1783         /* Input code assumption */
1784         case 'J':   /* JIS input */
1785             input_f = JIS_INPUT;
1786             continue;
1787         case 'E':   /* AT&T EUC input */
1788             input_f = EUC_INPUT;
1789             continue;
1790         case 'S':   /* MS Kanji input */
1791             input_f = SJIS_INPUT;
1792             if (x0201_f==NO_X0201) x0201_f=TRUE;
1793             continue;
1794         case 'Z':   /* Convert X0208 alphabet to asii */
1795             /* alpha_f
1796                bit:0   Convert JIS X 0208 Alphabet to ASCII
1797                bit:1   Convert Kankaku to one space
1798                bit:2   Convert Kankaku to two spaces
1799                bit:3   Convert HTML Entity
1800                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1801             */
1802             while ('0'<= *cp && *cp <='9') {
1803                 alpha_f |= 1 << (*cp++ - '0');
1804             }
1805             if (!alpha_f) alpha_f = 1;
1806             continue;
1807         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1808             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1809             /* accept  X0201
1810                     ESC-(-I     in JIS, EUC, MS Kanji
1811                     SI/SO       in JIS, EUC, MS Kanji
1812                     SSO         in EUC, JIS, not in MS Kanji
1813                     MS Kanji (0xa0-0xdf)
1814                output  X0201
1815                     ESC-(-I     in JIS (0x20-0x5f)
1816                     SSO         in EUC (0xa0-0xdf)
1817                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1818             */
1819             continue;
1820         case 'X':   /* Assume X0201 kana */
1821             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1822             x0201_f = TRUE;
1823             continue;
1824         case 'F':   /* prserve new lines */
1825             fold_preserve_f = TRUE;
1826         case 'f':   /* folding -f60 or -f */
1827             fold_f = TRUE;
1828             fold_len = 0;
1829             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1830                 fold_len *= 10;
1831                 fold_len += *cp++ - '0';
1832             }
1833             if (!(0<fold_len && fold_len<BUFSIZ))
1834                 fold_len = DEFAULT_FOLD;
1835             if (*cp=='-') {
1836                 fold_margin = 0;
1837                 cp++;
1838                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1839                     fold_margin *= 10;
1840                     fold_margin += *cp++ - '0';
1841                 }
1842             }
1843             continue;
1844         case 'm':   /* MIME support */
1845             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1846             if (*cp=='B'||*cp=='Q') {
1847                 mime_decode_mode = *cp++;
1848                 mimebuf_f = FIXED_MIME;
1849             } else if (*cp=='N') {
1850                 mime_f = TRUE; cp++;
1851             } else if (*cp=='S') {
1852                 mime_f = STRICT_MIME; cp++;
1853             } else if (*cp=='0') {
1854                 mime_decode_f = FALSE;
1855                 mime_f = FALSE; cp++;
1856             }
1857             continue;
1858         case 'M':   /* MIME output */
1859             if (*cp=='B') {
1860                 mimeout_mode = 'B';
1861                 mimeout_f = FIXED_MIME; cp++;
1862             } else if (*cp=='Q') {
1863                 mimeout_mode = 'Q';
1864                 mimeout_f = FIXED_MIME; cp++;
1865             } else {
1866                 mimeout_f = TRUE;
1867             }
1868             continue;
1869         case 'B':   /* Broken JIS support */
1870             /*  bit:0   no ESC JIS
1871                 bit:1   allow any x on ESC-(-x or ESC-$-x
1872                 bit:2   reset to ascii on NL
1873             */
1874             if ('9'>= *cp && *cp>='0')
1875                 broken_f |= 1<<(*cp++ -'0');
1876             else
1877                 broken_f |= TRUE;
1878             continue;
1879 #ifndef PERL_XS
1880         case 'O':/* for Output file */
1881             file_out_f = TRUE;
1882             continue;
1883 #endif
1884         case 'c':/* add cr code */
1885             nlmode_f = CRLF;
1886             continue;
1887         case 'd':/* delete cr code */
1888             nlmode_f = LF;
1889             continue;
1890         case 'I':   /* ISO-2022-JP output */
1891             iso2022jp_f = TRUE;
1892             continue;
1893         case 'L':  /* line mode */
1894             if (*cp=='u') {         /* unix */
1895                 nlmode_f = LF; cp++;
1896             } else if (*cp=='m') { /* mac */
1897                 nlmode_f = CR; cp++;
1898             } else if (*cp=='w') { /* windows */
1899                 nlmode_f = CRLF; cp++;
1900             } else if (*cp=='0') { /* no conversion  */
1901                 nlmode_f = 0; cp++;
1902             }
1903             continue;
1904 #ifndef PERL_XS
1905         case 'g':
1906             if (*cp == '1') {
1907                 guess_f = 2;
1908                 cp++;
1909             } else if (*cp == '0') {
1910                 guess_f = 1;
1911                 cp++;
1912             } else {
1913                 guess_f = 1;
1914             }
1915             continue;
1916 #endif
1917         case SP:
1918         /* module muliple options in a string are allowed for Perl moudle  */
1919             while(*cp && *cp++!='-');
1920             continue;
1921         default:
1922             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
1923             /* bogus option but ignored */
1924             continue;
1925         }
1926     }
1927 }
1928
1929 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1930 {
1931     if (iconv_func){
1932         struct input_code *p = input_code_list;
1933         while (p->name){
1934             if (iconv_func == p->iconv_func){
1935                 return p;
1936             }
1937             p++;
1938         }
1939     }
1940     return 0;
1941 }
1942
1943 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1944 {
1945 #ifdef INPUT_CODE_FIX
1946     if (f || !input_f)
1947 #endif
1948         if (estab_f != f){
1949             estab_f = f;
1950         }
1951
1952     if (iconv_func
1953 #ifdef INPUT_CODE_FIX
1954         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1955 #endif
1956         ){
1957         iconv = iconv_func;
1958     }
1959 #ifdef CHECK_OPTION
1960     if (estab_f && iconv_for_check != iconv){
1961         struct input_code *p = find_inputcode_byfunc(iconv);
1962         if (p){
1963             set_input_codename(p->name);
1964             debug(p->name);
1965         }
1966         iconv_for_check = iconv;
1967     }
1968 #endif
1969 }
1970
1971 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1972 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1973 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1974 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
1975 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
1976 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1977 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1978 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1979
1980 #define SCORE_INIT (SCORE_iMIME)
1981
1982 static const char score_table_A0[] = {
1983     0, 0, 0, 0,
1984     0, 0, 0, 0,
1985     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1986     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1987 };
1988
1989 static const char score_table_F0[] = {
1990     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1991     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1992     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
1993     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1994 };
1995
1996 void set_code_score(struct input_code *ptr, nkf_char score)
1997 {
1998     if (ptr){
1999         ptr->score |= score;
2000     }
2001 }
2002
2003 void clr_code_score(struct input_code *ptr, nkf_char score)
2004 {
2005     if (ptr){
2006         ptr->score &= ~score;
2007     }
2008 }
2009
2010 void code_score(struct input_code *ptr)
2011 {
2012     nkf_char c2 = ptr->buf[0];
2013 #ifdef UTF8_OUTPUT_ENABLE
2014     nkf_char c1 = ptr->buf[1];
2015 #endif
2016     if (c2 < 0){
2017         set_code_score(ptr, SCORE_ERROR);
2018     }else if (c2 == SSO){
2019         set_code_score(ptr, SCORE_KANA);
2020     }else if (c2 == 0x8f){
2021         set_code_score(ptr, SCORE_X0212);
2022 #ifdef UTF8_OUTPUT_ENABLE
2023     }else if (!e2w_conv(c2, c1)){
2024         set_code_score(ptr, SCORE_NO_EXIST);
2025 #endif
2026     }else if ((c2 & 0x70) == 0x20){
2027         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2028     }else if ((c2 & 0x70) == 0x70){
2029         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2030     }else if ((c2 & 0x70) >= 0x50){
2031         set_code_score(ptr, SCORE_L2);
2032     }
2033 }
2034
2035 void status_disable(struct input_code *ptr)
2036 {
2037     ptr->stat = -1;
2038     ptr->buf[0] = -1;
2039     code_score(ptr);
2040     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2041 }
2042
2043 void status_push_ch(struct input_code *ptr, nkf_char c)
2044 {
2045     ptr->buf[ptr->index++] = c;
2046 }
2047
2048 void status_clear(struct input_code *ptr)
2049 {
2050     ptr->stat = 0;
2051     ptr->index = 0;
2052 }
2053
2054 void status_reset(struct input_code *ptr)
2055 {
2056     status_clear(ptr);
2057     ptr->score = SCORE_INIT;
2058 }
2059
2060 void status_reinit(struct input_code *ptr)
2061 {
2062     status_reset(ptr);
2063     ptr->_file_stat = 0;
2064 }
2065
2066 void status_check(struct input_code *ptr, nkf_char c)
2067 {
2068     if (c <= DEL && estab_f){
2069         status_reset(ptr);
2070     }
2071 }
2072
2073 void s_status(struct input_code *ptr, nkf_char c)
2074 {
2075     switch(ptr->stat){
2076       case -1:
2077           status_check(ptr, c);
2078           break;
2079       case 0:
2080           if (c <= DEL){
2081               break;
2082 #ifdef NUMCHAR_OPTION
2083           }else if (is_unicode_capsule(c)){
2084               break;
2085 #endif
2086           }else if (0xa1 <= c && c <= 0xdf){
2087               status_push_ch(ptr, SSO);
2088               status_push_ch(ptr, c);
2089               code_score(ptr);
2090               status_clear(ptr);
2091           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2092               ptr->stat = 1;
2093               status_push_ch(ptr, c);
2094           }else if (0xed <= c && c <= 0xee){
2095               ptr->stat = 3;
2096               status_push_ch(ptr, c);
2097 #ifdef SHIFTJIS_CP932
2098           }else if (is_ibmext_in_sjis(c)){
2099               ptr->stat = 2;
2100               status_push_ch(ptr, c);
2101 #endif /* SHIFTJIS_CP932 */
2102 #ifdef X0212_ENABLE
2103           }else if (0xf0 <= c && c <= 0xfc){
2104               ptr->stat = 1;
2105               status_push_ch(ptr, c);
2106 #endif /* X0212_ENABLE */
2107           }else{
2108               status_disable(ptr);
2109           }
2110           break;
2111       case 1:
2112           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2113               status_push_ch(ptr, c);
2114               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2115               code_score(ptr);
2116               status_clear(ptr);
2117           }else{
2118               status_disable(ptr);
2119           }
2120           break;
2121       case 2:
2122 #ifdef SHIFTJIS_CP932
2123         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2124             status_push_ch(ptr, c);
2125             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2126                 set_code_score(ptr, SCORE_CP932);
2127                 status_clear(ptr);
2128                 break;
2129             }
2130         }
2131 #endif /* SHIFTJIS_CP932 */
2132         status_disable(ptr);
2133           break;
2134       case 3:
2135           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2136               status_push_ch(ptr, c);
2137               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2138             set_code_score(ptr, SCORE_CP932);
2139             status_clear(ptr);
2140           }else{
2141               status_disable(ptr);
2142           }
2143           break;
2144     }
2145 }
2146
2147 void e_status(struct input_code *ptr, nkf_char c)
2148 {
2149     switch (ptr->stat){
2150       case -1:
2151           status_check(ptr, c);
2152           break;
2153       case 0:
2154           if (c <= DEL){
2155               break;
2156 #ifdef NUMCHAR_OPTION
2157           }else if (is_unicode_capsule(c)){
2158               break;
2159 #endif
2160           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2161               ptr->stat = 1;
2162               status_push_ch(ptr, c);
2163 #ifdef X0212_ENABLE
2164           }else if (0x8f == c){
2165               ptr->stat = 2;
2166               status_push_ch(ptr, c);
2167 #endif /* X0212_ENABLE */
2168           }else{
2169               status_disable(ptr);
2170           }
2171           break;
2172       case 1:
2173           if (0xa1 <= c && c <= 0xfe){
2174               status_push_ch(ptr, c);
2175               code_score(ptr);
2176               status_clear(ptr);
2177           }else{
2178               status_disable(ptr);
2179           }
2180           break;
2181 #ifdef X0212_ENABLE
2182       case 2:
2183           if (0xa1 <= c && c <= 0xfe){
2184               ptr->stat = 1;
2185               status_push_ch(ptr, c);
2186           }else{
2187               status_disable(ptr);
2188           }
2189 #endif /* X0212_ENABLE */
2190     }
2191 }
2192
2193 #ifdef UTF8_INPUT_ENABLE
2194 void w_status(struct input_code *ptr, nkf_char c)
2195 {
2196     switch (ptr->stat){
2197       case -1:
2198           status_check(ptr, c);
2199           break;
2200       case 0:
2201           if (c <= DEL){
2202               break;
2203 #ifdef NUMCHAR_OPTION
2204           }else if (is_unicode_capsule(c)){
2205               break;
2206 #endif
2207           }else if (0xc0 <= c && c <= 0xdf){
2208               ptr->stat = 1;
2209               status_push_ch(ptr, c);
2210           }else if (0xe0 <= c && c <= 0xef){
2211               ptr->stat = 2;
2212               status_push_ch(ptr, c);
2213           }else if (0xf0 <= c && c <= 0xf4){
2214               ptr->stat = 3;
2215               status_push_ch(ptr, c);
2216           }else{
2217               status_disable(ptr);
2218           }
2219           break;
2220       case 1:
2221       case 2:
2222           if (0x80 <= c && c <= 0xbf){
2223               status_push_ch(ptr, c);
2224               if (ptr->index > ptr->stat){
2225                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2226                              && ptr->buf[2] == 0xbf);
2227                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2228                            &ptr->buf[0], &ptr->buf[1]);
2229                   if (!bom){
2230                       code_score(ptr);
2231                   }
2232                   status_clear(ptr);
2233               }
2234           }else{
2235               status_disable(ptr);
2236           }
2237           break;
2238       case 3:
2239         if (0x80 <= c && c <= 0xbf){
2240             if (ptr->index < ptr->stat){
2241                 status_push_ch(ptr, c);
2242             } else {
2243                 status_clear(ptr);
2244             }
2245           }else{
2246               status_disable(ptr);
2247           }
2248           break;
2249     }
2250 }
2251 #endif
2252
2253 void code_status(nkf_char c)
2254 {
2255     int action_flag = 1;
2256     struct input_code *result = 0;
2257     struct input_code *p = input_code_list;
2258     while (p->name){
2259         if (!p->status_func) {
2260             ++p;
2261             continue;
2262         }
2263         if (!p->status_func)
2264             continue;
2265         (p->status_func)(p, c);
2266         if (p->stat > 0){
2267             action_flag = 0;
2268         }else if(p->stat == 0){
2269             if (result){
2270                 action_flag = 0;
2271             }else{
2272                 result = p;
2273             }
2274         }
2275         ++p;
2276     }
2277
2278     if (action_flag){
2279         if (result && !estab_f){
2280             set_iconv(TRUE, result->iconv_func);
2281         }else if (c <= DEL){
2282             struct input_code *ptr = input_code_list;
2283             while (ptr->name){
2284                 status_reset(ptr);
2285                 ++ptr;
2286             }
2287         }
2288     }
2289 }
2290
2291 #ifndef WIN32DLL
2292 nkf_char std_getc(FILE *f)
2293 {
2294     if (std_gc_ndx){
2295         return std_gc_buf[--std_gc_ndx];
2296     }
2297     return getc(f);
2298 }
2299 #endif /*WIN32DLL*/
2300
2301 nkf_char std_ungetc(nkf_char c, FILE *f)
2302 {
2303     if (std_gc_ndx == STD_GC_BUFSIZE){
2304         return EOF;
2305     }
2306     std_gc_buf[std_gc_ndx++] = c;
2307     return c;
2308 }
2309
2310 #ifndef WIN32DLL
2311 void std_putc(nkf_char c)
2312 {
2313     if(c!=EOF)
2314       putchar(c);
2315 }
2316 #endif /*WIN32DLL*/
2317
2318 #if !defined(PERL_XS) && !defined(WIN32DLL)
2319 nkf_char noconvert(FILE *f)
2320 {
2321     nkf_char    c;
2322
2323     if (nop_f == 2)
2324         module_connection();
2325     while ((c = (*i_getc)(f)) != EOF)
2326       (*o_putc)(c);
2327     (*o_putc)(EOF);
2328     return 1;
2329 }
2330 #endif
2331
2332 void module_connection(void)
2333 {
2334     oconv = output_conv;
2335     o_putc = std_putc;
2336
2337     /* replace continucation module, from output side */
2338
2339     /* output redicrection */
2340 #ifdef CHECK_OPTION
2341     if (noout_f || guess_f){
2342         o_putc = no_putc;
2343     }
2344 #endif
2345     if (mimeout_f) {
2346         o_mputc = o_putc;
2347         o_putc = mime_putc;
2348         if (mimeout_f == TRUE) {
2349             o_base64conv = oconv; oconv = base64_conv;
2350         }
2351         /* base64_count = 0; */
2352     }
2353
2354     if (nlmode_f || guess_f) {
2355         o_nlconv = oconv; oconv = nl_conv;
2356     }
2357     if (rot_f) {
2358         o_rot_conv = oconv; oconv = rot_conv;
2359     }
2360     if (iso2022jp_f) {
2361         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2362     }
2363     if (hira_f) {
2364         o_hira_conv = oconv; oconv = hira_conv;
2365     }
2366     if (fold_f) {
2367         o_fconv = oconv; oconv = fold_conv;
2368         f_line = 0;
2369     }
2370     if (alpha_f || x0201_f) {
2371         o_zconv = oconv; oconv = z_conv;
2372     }
2373
2374     i_getc = std_getc;
2375     i_ungetc = std_ungetc;
2376     /* input redicrection */
2377 #ifdef INPUT_OPTION
2378     if (cap_f){
2379         i_cgetc = i_getc; i_getc = cap_getc;
2380         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2381     }
2382     if (url_f){
2383         i_ugetc = i_getc; i_getc = url_getc;
2384         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2385     }
2386 #endif
2387 #ifdef NUMCHAR_OPTION
2388     if (numchar_f){
2389         i_ngetc = i_getc; i_getc = numchar_getc;
2390         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2391     }
2392 #endif
2393 #ifdef UNICODE_NORMALIZATION
2394     if (nfc_f && input_f == UTF8_INPUT){
2395         i_nfc_getc = i_getc; i_getc = nfc_getc;
2396         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2397     }
2398 #endif
2399     if (mime_f && mimebuf_f==FIXED_MIME) {
2400         i_mgetc = i_getc; i_getc = mime_getc;
2401         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2402     }
2403     if (broken_f & 1) {
2404         i_bgetc = i_getc; i_getc = broken_getc;
2405         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2406     }
2407     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2408         set_iconv(-TRUE, e_iconv);
2409     } else if (input_f == SJIS_INPUT) {
2410         set_iconv(-TRUE, s_iconv);
2411 #ifdef UTF8_INPUT_ENABLE
2412     } else if (input_f == UTF8_INPUT) {
2413         set_iconv(-TRUE, w_iconv);
2414     } else if (input_f == UTF16_INPUT) {
2415         set_iconv(-TRUE, w_iconv16);
2416     } else if (input_f == UTF32_INPUT) {
2417         set_iconv(-TRUE, w_iconv32);
2418 #endif
2419     } else {
2420         set_iconv(FALSE, e_iconv);
2421     }
2422
2423     {
2424         struct input_code *p = input_code_list;
2425         while (p->name){
2426             status_reinit(p++);
2427         }
2428     }
2429 }
2430
2431 /*
2432  * Check and Ignore BOM
2433  */
2434 void check_bom(FILE *f)
2435 {
2436     int c2;
2437     switch(c2 = (*i_getc)(f)){
2438     case 0x00:
2439         if((c2 = (*i_getc)(f)) == 0x00){
2440             if((c2 = (*i_getc)(f)) == 0xFE){
2441                 if((c2 = (*i_getc)(f)) == 0xFF){
2442                     if(!input_f){
2443                         set_iconv(TRUE, w_iconv32);
2444                     }
2445                     if (iconv == w_iconv32) {
2446                         input_endian = ENDIAN_BIG;
2447                         return;
2448                     }
2449                     (*i_ungetc)(0xFF,f);
2450                 }else (*i_ungetc)(c2,f);
2451                 (*i_ungetc)(0xFE,f);
2452             }else if(c2 == 0xFF){
2453                 if((c2 = (*i_getc)(f)) == 0xFE){
2454                     if(!input_f){
2455                         set_iconv(TRUE, w_iconv32);
2456                     }
2457                     if (iconv == w_iconv32) {
2458                         input_endian = ENDIAN_2143;
2459                         return;
2460                     }
2461                     (*i_ungetc)(0xFF,f);
2462                 }else (*i_ungetc)(c2,f);
2463                 (*i_ungetc)(0xFF,f);
2464             }else (*i_ungetc)(c2,f);
2465             (*i_ungetc)(0x00,f);
2466         }else (*i_ungetc)(c2,f);
2467         (*i_ungetc)(0x00,f);
2468         break;
2469     case 0xEF:
2470         if((c2 = (*i_getc)(f)) == 0xBB){
2471             if((c2 = (*i_getc)(f)) == 0xBF){
2472                 if(!input_f){
2473                     set_iconv(TRUE, w_iconv);
2474                 }
2475                 if (iconv == w_iconv) {
2476                     return;
2477                 }
2478                 (*i_ungetc)(0xBF,f);
2479             }else (*i_ungetc)(c2,f);
2480             (*i_ungetc)(0xBB,f);
2481         }else (*i_ungetc)(c2,f);
2482         (*i_ungetc)(0xEF,f);
2483         break;
2484     case 0xFE:
2485         if((c2 = (*i_getc)(f)) == 0xFF){
2486             if((c2 = (*i_getc)(f)) == 0x00){
2487                 if((c2 = (*i_getc)(f)) == 0x00){
2488                     if(!input_f){
2489                         set_iconv(TRUE, w_iconv32);
2490                     }
2491                     if (iconv == w_iconv32) {
2492                         input_endian = ENDIAN_3412;
2493                         return;
2494                     }
2495                     (*i_ungetc)(0x00,f);
2496                 }else (*i_ungetc)(c2,f);
2497                 (*i_ungetc)(0x00,f);
2498             }else (*i_ungetc)(c2,f);
2499             if(!input_f){
2500                 set_iconv(TRUE, w_iconv16);
2501             }
2502             if (iconv == w_iconv16) {
2503                 input_endian = ENDIAN_BIG;
2504                 return;
2505             }
2506             (*i_ungetc)(0xFF,f);
2507         }else (*i_ungetc)(c2,f);
2508         (*i_ungetc)(0xFE,f);
2509         break;
2510     case 0xFF:
2511         if((c2 = (*i_getc)(f)) == 0xFE){
2512             if((c2 = (*i_getc)(f)) == 0x00){
2513                 if((c2 = (*i_getc)(f)) == 0x00){
2514                     if(!input_f){
2515                         set_iconv(TRUE, w_iconv32);
2516                     }
2517                     if (iconv == w_iconv32) {
2518                         input_endian = ENDIAN_LITTLE;
2519                         return;
2520                     }
2521                     (*i_ungetc)(0x00,f);
2522                 }else (*i_ungetc)(c2,f);
2523                 (*i_ungetc)(0x00,f);
2524             }else (*i_ungetc)(c2,f);
2525             if(!input_f){
2526                 set_iconv(TRUE, w_iconv16);
2527             }
2528             if (iconv == w_iconv16) {
2529                 input_endian = ENDIAN_LITTLE;
2530                 return;
2531             }
2532             (*i_ungetc)(0xFE,f);
2533         }else (*i_ungetc)(c2,f);
2534         (*i_ungetc)(0xFF,f);
2535         break;
2536     default:
2537         (*i_ungetc)(c2,f);
2538         break;
2539     }
2540 }
2541
2542 /*
2543    Conversion main loop. Code detection only.
2544  */
2545
2546 nkf_char kanji_convert(FILE *f)
2547 {
2548     nkf_char    c3, c2=0, c1, c0=0;
2549     int is_8bit = FALSE;
2550
2551     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2552 #ifdef UTF8_INPUT_ENABLE
2553        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2554 #endif
2555       ){
2556         is_8bit = TRUE;
2557     }
2558
2559     input_mode = ASCII;
2560     output_mode = ASCII;
2561     shift_mode = FALSE;
2562
2563 #define NEXT continue      /* no output, get next */
2564 #define SEND ;             /* output c1 and c2, get next */
2565 #define LAST break         /* end of loop, go closing  */
2566
2567     module_connection();
2568     check_bom(f);
2569
2570     while ((c1 = (*i_getc)(f)) != EOF) {
2571 #ifdef INPUT_CODE_FIX
2572         if (!input_f)
2573 #endif
2574             code_status(c1);
2575         if (c2) {
2576             /* second byte */
2577             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2578                 /* in case of 8th bit is on */
2579                 if (!estab_f&&!mime_decode_mode) {
2580                     /* in case of not established yet */
2581                     /* It is still ambiguious */
2582                     if (h_conv(f, c2, c1)==EOF)
2583                         LAST;
2584                     else
2585                         c2 = 0;
2586                     NEXT;
2587                 } else {
2588                     /* in case of already established */
2589                     if (c1 < AT) {
2590                         /* ignore bogus code and not CP5022x UCD */
2591                         c2 = 0;
2592                         NEXT;
2593                     } else {
2594                         SEND;
2595                     }
2596                 }
2597             } else
2598                 /* second byte, 7 bit code */
2599                 /* it might be kanji shitfted */
2600                 if ((c1 == DEL) || (c1 <= SP)) {
2601                     /* ignore bogus first code */
2602                     c2 = 0;
2603                     NEXT;
2604                 } else
2605                     SEND;
2606         } else {
2607             /* first byte */
2608 #ifdef UTF8_INPUT_ENABLE
2609             if (iconv == w_iconv16) {
2610                 if (input_endian == ENDIAN_BIG) {
2611                     c2 = c1;
2612                     if ((c1 = (*i_getc)(f)) != EOF) {
2613                         if (0xD8 <= c2 && c2 <= 0xDB) {
2614                             if ((c0 = (*i_getc)(f)) != EOF) {
2615                                 c0 <<= 8;
2616                                 if ((c3 = (*i_getc)(f)) != EOF) {
2617                                     c0 |= c3;
2618                                 } else c2 = EOF;
2619                             } else c2 = EOF;
2620                         }
2621                     } else c2 = EOF;
2622                 } else {
2623                     if ((c2 = (*i_getc)(f)) != EOF) {
2624                         if (0xD8 <= c2 && c2 <= 0xDB) {
2625                             if ((c3 = (*i_getc)(f)) != EOF) {
2626                                 if ((c0 = (*i_getc)(f)) != EOF) {
2627                                     c0 <<= 8;
2628                                     c0 |= c3;
2629                                 } else c2 = EOF;
2630                             } else c2 = EOF;
2631                         }
2632                     } else c2 = EOF;
2633                 }
2634                 SEND;
2635             } else if(iconv == w_iconv32){
2636                 int c3 = c1;
2637                 if((c2 = (*i_getc)(f)) != EOF &&
2638                    (c1 = (*i_getc)(f)) != EOF &&
2639                    (c0 = (*i_getc)(f)) != EOF){
2640                     switch(input_endian){
2641                     case ENDIAN_BIG:
2642                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2643                         break;
2644                     case ENDIAN_LITTLE:
2645                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2646                         break;
2647                     case ENDIAN_2143:
2648                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2649                         break;
2650                     case ENDIAN_3412:
2651                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2652                         break;
2653                     }
2654                     c2 = 0;
2655                 }else{
2656                     c2 = EOF;
2657                 }
2658                 SEND;
2659             } else
2660 #endif
2661 #ifdef NUMCHAR_OPTION
2662             if (is_unicode_capsule(c1)){
2663                 SEND;
2664             } else
2665 #endif
2666             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2667                 /* 8 bit code */
2668                 if (!estab_f && !iso8859_f) {
2669                     /* not established yet */
2670                     c2 = c1;
2671                     NEXT;
2672                 } else { /* estab_f==TRUE */
2673                     if (iso8859_f) {
2674                         c2 = ISO8859_1;
2675                         c1 &= 0x7f;
2676                         SEND;
2677                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2678                         /* SJIS X0201 Case... */
2679                         if(iso2022jp_f && x0201_f==NO_X0201) {
2680                             (*oconv)(GETA1, GETA2);
2681                             NEXT;
2682                         } else {
2683                             c2 = X0201;
2684                             c1 &= 0x7f;
2685                             SEND;
2686                         }
2687                     } else if (c1==SSO && iconv != s_iconv) {
2688                         /* EUC X0201 Case */
2689                         c1 = (*i_getc)(f);  /* skip SSO */
2690                         code_status(c1);
2691                         if (SSP<=c1 && c1<0xe0) {
2692                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2693                                 (*oconv)(GETA1, GETA2);
2694                                 NEXT;
2695                             } else {
2696                                 c2 = X0201;
2697                                 c1 &= 0x7f;
2698                                 SEND;
2699                             }
2700                         } else  { /* bogus code, skip SSO and one byte */
2701                             NEXT;
2702                         }
2703                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2704                                (c1 == 0xFD || c1 == 0xFE)) {
2705                         /* CP10001 */
2706                         c2 = X0201;
2707                         c1 &= 0x7f;
2708                         SEND;
2709                     } else {
2710                        /* already established */
2711                        c2 = c1;
2712                        NEXT;
2713                     }
2714                 }
2715             } else if ((c1 > SP) && (c1 != DEL)) {
2716                 /* in case of Roman characters */
2717                 if (shift_mode) {
2718                     /* output 1 shifted byte */
2719                     if (iso8859_f) {
2720                         c2 = ISO8859_1;
2721                         SEND;
2722                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2723                       /* output 1 shifted byte */
2724                         if(iso2022jp_f && x0201_f==NO_X0201) {
2725                             (*oconv)(GETA1, GETA2);
2726                             NEXT;
2727                         } else {
2728                             c2 = X0201;
2729                             SEND;
2730                         }
2731                     } else {
2732                         /* look like bogus code */
2733                         NEXT;
2734                     }
2735                 } else if (input_mode == X0208 || input_mode == X0212 ||
2736                            input_mode == X0213_1 || input_mode == X0213_2) {
2737                     /* in case of Kanji shifted */
2738                     c2 = c1;
2739                     NEXT;
2740                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2741                     /* Check MIME code */
2742                     if ((c1 = (*i_getc)(f)) == EOF) {
2743                         (*oconv)(0, '=');
2744                         LAST;
2745                     } else if (c1 == '?') {
2746                         /* =? is mime conversion start sequence */
2747                         if(mime_f == STRICT_MIME) {
2748                             /* check in real detail */
2749                             if (mime_begin_strict(f) == EOF)
2750                                 LAST;
2751                             else
2752                                 NEXT;
2753                         } else if (mime_begin(f) == EOF)
2754                             LAST;
2755                         else
2756                             NEXT;
2757                     } else {
2758                         (*oconv)(0, '=');
2759                         (*i_ungetc)(c1,f);
2760                         NEXT;
2761                     }
2762                 } else {
2763                     /* normal ASCII code */
2764                     SEND;
2765                 }
2766             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2767                 shift_mode = FALSE;
2768                 NEXT;
2769             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2770                 shift_mode = TRUE;
2771                 NEXT;
2772             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2773                 if ((c1 = (*i_getc)(f)) == EOF) {
2774                     /*  (*oconv)(0, ESC); don't send bogus code */
2775                     LAST;
2776                 } else if (c1 == '$') {
2777                     if ((c1 = (*i_getc)(f)) == EOF) {
2778                         /*
2779                         (*oconv)(0, ESC); don't send bogus code
2780                         (*oconv)(0, '$'); */
2781                         LAST;
2782                     } else if (c1 == '@'|| c1 == 'B') {
2783                         /* This is kanji introduction */
2784                         input_mode = X0208;
2785                         shift_mode = FALSE;
2786                         set_input_codename("ISO-2022-JP");
2787 #ifdef CHECK_OPTION
2788                         debug("ISO-2022-JP");
2789 #endif
2790                         NEXT;
2791                     } else if (c1 == '(') {
2792                         if ((c1 = (*i_getc)(f)) == EOF) {
2793                             /* don't send bogus code
2794                             (*oconv)(0, ESC);
2795                             (*oconv)(0, '$');
2796                             (*oconv)(0, '(');
2797                                 */
2798                             LAST;
2799                         } else if (c1 == '@'|| c1 == 'B') {
2800                             /* This is kanji introduction */
2801                             input_mode = X0208;
2802                             shift_mode = FALSE;
2803                             NEXT;
2804 #ifdef X0212_ENABLE
2805                         } else if (c1 == 'D'){
2806                             input_mode = X0212;
2807                             shift_mode = FALSE;
2808                             NEXT;
2809 #endif /* X0212_ENABLE */
2810                         } else if (c1 == (X0213_1&0x7F)){
2811                             input_mode = X0213_1;
2812                             shift_mode = FALSE;
2813                             NEXT;
2814                         } else if (c1 == (X0213_2&0x7F)){
2815                             input_mode = X0213_2;
2816                             shift_mode = FALSE;
2817                             NEXT;
2818                         } else {
2819                             /* could be some special code */
2820                             (*oconv)(0, ESC);
2821                             (*oconv)(0, '$');
2822                             (*oconv)(0, '(');
2823                             (*oconv)(0, c1);
2824                             NEXT;
2825                         }
2826                     } else if (broken_f&0x2) {
2827                         /* accept any ESC-(-x as broken code ... */
2828                         input_mode = X0208;
2829                         shift_mode = FALSE;
2830                         NEXT;
2831                     } else {
2832                         (*oconv)(0, ESC);
2833                         (*oconv)(0, '$');
2834                         (*oconv)(0, c1);
2835                         NEXT;
2836                     }
2837                 } else if (c1 == '(') {
2838                     if ((c1 = (*i_getc)(f)) == EOF) {
2839                         /* don't send bogus code
2840                         (*oconv)(0, ESC);
2841                         (*oconv)(0, '('); */
2842                         LAST;
2843                     } else {
2844                         if (c1 == 'I') {
2845                             /* This is X0201 kana introduction */
2846                             input_mode = X0201; shift_mode = X0201;
2847                             NEXT;
2848                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2849                             /* This is X0208 kanji introduction */
2850                             input_mode = ASCII; shift_mode = FALSE;
2851                             NEXT;
2852                         } else if (broken_f&0x2) {
2853                             input_mode = ASCII; shift_mode = FALSE;
2854                             NEXT;
2855                         } else {
2856                             (*oconv)(0, ESC);
2857                             (*oconv)(0, '(');
2858                             /* maintain various input_mode here */
2859                             SEND;
2860                         }
2861                     }
2862                } else if ( c1 == 'N' || c1 == 'n'){
2863                    /* SS2 */
2864                    c3 = (*i_getc)(f);  /* skip SS2 */
2865                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2866                        c1 = c3;
2867                        c2 = X0201;
2868                        SEND;
2869                    }else{
2870                        (*i_ungetc)(c3, f);
2871                        /* lonely ESC  */
2872                        (*oconv)(0, ESC);
2873                        SEND;
2874                    }
2875                 } else {
2876                     /* lonely ESC  */
2877                     (*oconv)(0, ESC);
2878                     SEND;
2879                 }
2880             } else if (c1 == ESC && iconv == s_iconv) {
2881                 /* ESC in Shift_JIS */
2882                 if ((c1 = (*i_getc)(f)) == EOF) {
2883                     /*  (*oconv)(0, ESC); don't send bogus code */
2884                     LAST;
2885                 } else if (c1 == '$') {
2886                     /* J-PHONE emoji */
2887                     if ((c1 = (*i_getc)(f)) == EOF) {
2888                         /*
2889                            (*oconv)(0, ESC); don't send bogus code
2890                            (*oconv)(0, '$'); */
2891                         LAST;
2892                     } else {
2893                         if (('E' <= c1 && c1 <= 'G') ||
2894                             ('O' <= c1 && c1 <= 'Q')) {
2895                             /*
2896                                NUM : 0 1 2 3 4 5
2897                                BYTE: G E F O P Q
2898                                C%7 : 1 6 0 2 3 4
2899                                C%7 : 0 1 2 3 4 5 6
2900                                NUM : 2 0 3 4 5 X 1
2901                              */
2902                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2903                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2904                             while ((c1 = (*i_getc)(f)) != EOF) {
2905                                 if (SP <= c1 && c1 <= 'z') {
2906                                     (*oconv)(0, c1 + c0);
2907                                 } else break; /* c1 == SO */
2908                             }
2909                         }
2910                     }
2911                     if (c1 == EOF) LAST;
2912                     NEXT;
2913                 } else {
2914                     /* lonely ESC  */
2915                     (*oconv)(0, ESC);
2916                     SEND;
2917                 }
2918             } else if (c1 == LF || c1 == CR) {
2919                 if (broken_f&4) {
2920                     input_mode = ASCII; set_iconv(FALSE, 0);
2921                     SEND;
2922                 } else if (mime_decode_f && !mime_decode_mode){
2923                     if (c1 == LF) {
2924                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2925                             i_ungetc(SP,f);
2926                             continue;
2927                         } else {
2928                             i_ungetc(c1,f);
2929                         }
2930                         c1 = LF;
2931                         SEND;
2932                     } else  { /* if (c1 == CR)*/
2933                         if ((c1=(*i_getc)(f))!=EOF) {
2934                             if (c1==SP) {
2935                                 i_ungetc(SP,f);
2936                                 continue;
2937                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
2938                                 i_ungetc(SP,f);
2939                                 continue;
2940                             } else {
2941                                 i_ungetc(c1,f);
2942                             }
2943                             i_ungetc(LF,f);
2944                         } else {
2945                             i_ungetc(c1,f);
2946                         }
2947                         c1 = CR;
2948                         SEND;
2949                     }
2950                 }
2951             } else if (c1 == DEL && input_mode == X0208) {
2952                 /* CP5022x */
2953                 c2 = c1;
2954                 NEXT;
2955             } else
2956                 SEND;
2957         }
2958         /* send: */
2959         switch(input_mode){
2960         case ASCII:
2961             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2962             case -2:
2963                 /* 4 bytes UTF-8 */
2964                 if ((c0 = (*i_getc)(f)) != EOF) {
2965                     code_status(c0);
2966                     c0 <<= 8;
2967                     if ((c3 = (*i_getc)(f)) != EOF) {
2968                         code_status(c3);
2969                         (*iconv)(c2, c1, c0|c3);
2970                     }
2971                 }
2972                 break;
2973             case -1:
2974                 /* 3 bytes EUC or UTF-8 */
2975                 if ((c0 = (*i_getc)(f)) != EOF) {
2976                     code_status(c0);
2977                     (*iconv)(c2, c1, c0);
2978                 }
2979                 break;
2980             }
2981             break;
2982         case X0208:
2983         case X0213_1:
2984             if (ms_ucs_map_f &&
2985                 0x7F <= c2 && c2 <= 0x92 &&
2986                 0x21 <= c1 && c1 <= 0x7E) {
2987                 /* CP932 UDC */
2988                 if(c1 == 0x7F) return 0;
2989                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2990                 c2 = 0;
2991             }
2992             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2993             break;
2994 #ifdef X0212_ENABLE
2995         case X0212:
2996             (*oconv)(PREFIX_EUCG3 | c2, c1);
2997             break;
2998 #endif /* X0212_ENABLE */
2999         case X0213_2:
3000             (*oconv)(PREFIX_EUCG3 | c2, c1);
3001             break;
3002         default:
3003             (*oconv)(input_mode, c1);  /* other special case */
3004         }
3005
3006         c2 = 0;
3007         c0 = 0;
3008         continue;
3009         /* goto next_word */
3010     }
3011
3012     /* epilogue */
3013     (*iconv)(EOF, 0, 0);
3014     if (!input_codename)
3015     {
3016         if (is_8bit) {
3017             struct input_code *p = input_code_list;
3018             struct input_code *result = p;
3019             while (p->name){
3020                 if (p->score < result->score) result = p;
3021                 ++p;
3022             }
3023             set_input_codename(result->name);
3024 #ifdef CHECK_OPTION
3025             debug(result->name);
3026 #endif
3027         }
3028     }
3029     return 1;
3030 }
3031
3032 nkf_char
3033 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3034 {
3035     nkf_char ret, c3, c0;
3036     int hold_index;
3037
3038
3039     /** it must NOT be in the kanji shifte sequence      */
3040     /** it must NOT be written in JIS7                   */
3041     /** and it must be after 2 byte 8bit code            */
3042
3043     hold_count = 0;
3044     push_hold_buf(c2);
3045     push_hold_buf(c1);
3046
3047     while ((c1 = (*i_getc)(f)) != EOF) {
3048         if (c1 == ESC){
3049             (*i_ungetc)(c1,f);
3050             break;
3051         }
3052         code_status(c1);
3053         if (push_hold_buf(c1) == EOF || estab_f){
3054             break;
3055         }
3056     }
3057
3058     if (!estab_f){
3059         struct input_code *p = input_code_list;
3060         struct input_code *result = p;
3061         if (c1 == EOF){
3062             code_status(c1);
3063         }
3064         while (p->name){
3065             if (p->status_func && p->score < result->score){
3066                 result = p;
3067             }
3068             ++p;
3069         }
3070         set_iconv(TRUE, result->iconv_func);
3071     }
3072
3073
3074     /** now,
3075      ** 1) EOF is detected, or
3076      ** 2) Code is established, or
3077      ** 3) Buffer is FULL (but last word is pushed)
3078      **
3079      ** in 1) and 3) cases, we continue to use
3080      ** Kanji codes by oconv and leave estab_f unchanged.
3081      **/
3082
3083     ret = c1;
3084     hold_index = 0;
3085     while (hold_index < hold_count){
3086         c2 = hold_buf[hold_index++];
3087         if (c2 <= DEL
3088 #ifdef NUMCHAR_OPTION
3089             || is_unicode_capsule(c2)
3090 #endif
3091             ){
3092             (*iconv)(0, c2, 0);
3093             continue;
3094         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3095             (*iconv)(X0201, c2, 0);
3096             continue;
3097         }
3098         if (hold_index < hold_count){
3099             c1 = hold_buf[hold_index++];
3100         }else{
3101             c1 = (*i_getc)(f);
3102             if (c1 == EOF){
3103                 c3 = EOF;
3104                 break;
3105             }
3106             code_status(c1);
3107         }
3108         c0 = 0;
3109         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3110         case -2:
3111             /* 4 bytes UTF-8 */
3112             if (hold_index < hold_count){
3113                 c0 = hold_buf[hold_index++];
3114             } else if ((c0 = (*i_getc)(f)) == EOF) {
3115                 ret = EOF;
3116                 break;
3117             } else {
3118                 code_status(c0);
3119                 c0 <<= 8;
3120                 if (hold_index < hold_count){
3121                     c3 = hold_buf[hold_index++];
3122                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3123                     c0 = ret = EOF;
3124                     break;
3125                 } else {
3126                     code_status(c3);
3127                     (*iconv)(c2, c1, c0|c3);
3128                 }
3129             }
3130             break;
3131         case -1:
3132             /* 3 bytes EUC or UTF-8 */
3133             if (hold_index < hold_count){
3134                 c0 = hold_buf[hold_index++];
3135             } else if ((c0 = (*i_getc)(f)) == EOF) {
3136                 ret = EOF;
3137                 break;
3138             } else {
3139                 code_status(c0);
3140             }
3141             (*iconv)(c2, c1, c0);
3142             break;
3143         }
3144         if (c0 == EOF) break;
3145     }
3146     return ret;
3147 }
3148
3149 nkf_char push_hold_buf(nkf_char c2)
3150 {
3151     if (hold_count >= HOLD_SIZE*2)
3152         return (EOF);
3153     hold_buf[hold_count++] = (unsigned char)c2;
3154     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3155 }
3156
3157 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3158 {
3159 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3160     nkf_char val;
3161 #endif
3162     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3163 #ifdef SHIFTJIS_CP932
3164     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3165         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3166         if (val){
3167             c2 = val >> 8;
3168             c1 = val & 0xff;
3169         }
3170     }
3171     if (cp932inv_f
3172         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3173         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3174         if (c){
3175             c2 = c >> 8;
3176             c1 = c & 0xff;
3177         }
3178     }
3179 #endif /* SHIFTJIS_CP932 */
3180 #ifdef X0212_ENABLE
3181     if (!x0213_f && is_ibmext_in_sjis(c2)){
3182         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3183         if (val){
3184             if (val > 0x7FFF){
3185                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3186                 c1 = val & 0xff;
3187             }else{
3188                 c2 = val >> 8;
3189                 c1 = val & 0xff;
3190             }
3191             if (p2) *p2 = c2;
3192             if (p1) *p1 = c1;
3193             return 0;
3194         }
3195     }
3196 #endif
3197     if(c2 >= 0x80){
3198         if(x0213_f && c2 >= 0xF0){
3199             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3200                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3201             }else{ /* 78<=k<=94 */
3202                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3203                 if (0x9E < c1) c2++;
3204             }
3205         }else{
3206             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3207             if (0x9E < c1) c2++;
3208         }
3209         if (c1 < 0x9F)
3210             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3211         else {
3212             c1 = c1 - 0x7E;
3213         }
3214     }
3215
3216 #ifdef X0212_ENABLE
3217     c2 = x0212_unshift(c2);
3218 #endif
3219     if (p2) *p2 = c2;
3220     if (p1) *p1 = c1;
3221     return 0;
3222 }
3223
3224 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3225 {
3226     if (c2 == X0201) {
3227         c1 &= 0x7f;
3228     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3229         /* NOP */
3230     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3231         /* CP932 UDC */
3232         if(c1 == 0x7F) return 0;
3233         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3234         c2 = 0;
3235     } else {
3236         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3237         if (ret) return ret;
3238     }
3239     (*oconv)(c2, c1);
3240     return 0;
3241 }
3242
3243 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3244 {
3245     if (c2 == X0201) {
3246         c1 &= 0x7f;
3247 #ifdef X0212_ENABLE
3248     }else if (c2 == 0x8f){
3249         if (c0 == 0){
3250             return -1;
3251         }
3252         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3253             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3254             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3255             c2 = 0;
3256         } else {
3257             c2 = (c2 << 8) | (c1 & 0x7f);
3258             c1 = c0 & 0x7f;
3259 #ifdef SHIFTJIS_CP932
3260             if (cp51932_f){
3261                 nkf_char s2, s1;
3262                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3263                     s2e_conv(s2, s1, &c2, &c1);
3264                     if (c2 < 0x100){
3265                         c1 &= 0x7f;
3266                         c2 &= 0x7f;
3267                     }
3268                 }
3269             }
3270 #endif /* SHIFTJIS_CP932 */
3271         }
3272 #endif /* X0212_ENABLE */
3273     } else if (c2 == SSO){
3274         c2 = X0201;
3275         c1 &= 0x7f;
3276     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3277         /* NOP */
3278     } else {
3279         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3280             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3281             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3282             c2 = 0;
3283         } else {
3284             c1 &= 0x7f;
3285             c2 &= 0x7f;
3286 #ifdef SHIFTJIS_CP932
3287             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3288                 nkf_char s2, s1;
3289                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3290                     s2e_conv(s2, s1, &c2, &c1);
3291                     if (c2 < 0x100){
3292                         c1 &= 0x7f;
3293                         c2 &= 0x7f;
3294                     }
3295                 }
3296             }
3297 #endif /* SHIFTJIS_CP932 */
3298         }
3299     }
3300     (*oconv)(c2, c1);
3301     return 0;
3302 }
3303
3304 #ifdef UTF8_INPUT_ENABLE
3305 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3306 {
3307     nkf_char ret = 0;
3308
3309     if (!c1){
3310         *p2 = 0;
3311         *p1 = c2;
3312     }else if (0xc0 <= c2 && c2 <= 0xef) {
3313         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3314 #ifdef NUMCHAR_OPTION
3315         if (ret > 0){
3316             if (p2) *p2 = 0;
3317             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3318             ret = 0;
3319         }
3320 #endif
3321     }
3322     return ret;
3323 }
3324
3325 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3326 {
3327     nkf_char ret = 0;
3328     static const char w_iconv_utf8_1st_byte[] =
3329     { /* 0xC0 - 0xFF */
3330         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3331         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3332         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3333         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3334
3335     if (c2 < 0 || 0xff < c2) {
3336     }else if (c2 == 0) { /* 0 : 1 byte*/
3337         c0 = 0;
3338     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3339         return 0;
3340     } else{
3341         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3342         case 21:
3343             if (c1 < 0x80 || 0xBF < c1) return 0;
3344             break;
3345         case 30:
3346             if (c0 == 0) return -1;
3347             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3348                 return 0;
3349             break;
3350         case 31:
3351         case 33:
3352             if (c0 == 0) return -1;
3353             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3354                 return 0;
3355             break;
3356         case 32:
3357             if (c0 == 0) return -1;
3358             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3359                 return 0;
3360             break;
3361         case 40:
3362             if (c0 == 0) return -2;
3363             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3364                 return 0;
3365             break;
3366         case 41:
3367             if (c0 == 0) return -2;
3368             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3369                 return 0;
3370             break;
3371         case 42:
3372             if (c0 == 0) return -2;
3373             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3374                 return 0;
3375             break;
3376         default:
3377             return 0;
3378             break;
3379         }
3380     }
3381     if (c2 == 0 || c2 == EOF){
3382     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3383         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3384         c2 = 0;
3385     } else {
3386         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3387     }
3388     if (ret == 0){
3389         (*oconv)(c2, c1);
3390     }
3391     return ret;
3392 }
3393 #endif
3394
3395 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3396 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3397 {
3398     val &= VALUE_MASK;
3399     if (val < 0x80){
3400         *p2 = val;
3401         *p1 = 0;
3402         *p0 = 0;
3403     }else if (val < 0x800){
3404         *p2 = 0xc0 | (val >> 6);
3405         *p1 = 0x80 | (val & 0x3f);
3406         *p0 = 0;
3407     } else if (val <= NKF_INT32_C(0xFFFF)) {
3408         *p2 = 0xe0 | (val >> 12);
3409         *p1 = 0x80 | ((val >> 6) & 0x3f);
3410         *p0 = 0x80 | (val        & 0x3f);
3411     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3412         *p2 = 0xe0 |  (val >> 16);
3413         *p1 = 0x80 | ((val >> 12) & 0x3f);
3414         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3415     } else {
3416         *p2 = 0;
3417         *p1 = 0;
3418         *p0 = 0;
3419     }
3420 }
3421 #endif
3422
3423 #ifdef UTF8_INPUT_ENABLE
3424 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3425 {
3426     nkf_char val;
3427     if (c2 >= 0xf8) {
3428         val = -1;
3429     } else if (c2 >= 0xf0){
3430         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3431         val = (c2 & 0x0f) << 18;
3432         val |= (c1 & 0x3f) << 12;
3433         val |= (c0 & 0x3f00) >> 2;
3434         val |= (c0 & 0x3f);
3435     }else if (c2 >= 0xe0){
3436         val = (c2 & 0x0f) << 12;
3437         val |= (c1 & 0x3f) << 6;
3438         val |= (c0 & 0x3f);
3439     }else if (c2 >= 0xc0){
3440         val = (c2 & 0x1f) << 6;
3441         val |= (c1 & 0x3f);
3442     }else{
3443         val = c2;
3444     }
3445     return val;
3446 }
3447
3448 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3449 {
3450     nkf_char c2, c1, c0;
3451     nkf_char ret = 0;
3452     val &= VALUE_MASK;
3453     if (val < 0x80){
3454         *p2 = 0;
3455         *p1 = val;
3456     }else{
3457         w16w_conv(val, &c2, &c1, &c0);
3458         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3459 #ifdef NUMCHAR_OPTION
3460         if (ret > 0){
3461             *p2 = 0;
3462             *p1 = CLASS_UNICODE | val;
3463             ret = 0;
3464         }
3465 #endif
3466     }
3467     return ret;
3468 }
3469 #endif
3470
3471 #ifdef UTF8_INPUT_ENABLE
3472 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3473 {
3474     nkf_char ret = 0;
3475     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3476         (*oconv)(c2, c1);
3477         return 0;
3478     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3479         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3480             return -2;
3481         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3482         c2 = 0;
3483     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3484         /*
3485            return 2;
3486         */
3487         return 1;
3488     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3489     if (ret) return ret;
3490     (*oconv)(c2, c1);
3491     return 0;
3492 }
3493
3494 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3495 {
3496     int ret = 0;
3497
3498     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3499     } else if (is_unicode_bmp(c1)) {
3500         ret = w16e_conv(c1, &c2, &c1);
3501     } else {
3502         c2 = 0;
3503         c1 =  CLASS_UNICODE | c1;
3504     }
3505     if (ret) return ret;
3506     (*oconv)(c2, c1);
3507     return 0;
3508 }
3509
3510 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3511 {
3512     const unsigned short *const *pp;
3513     const unsigned short *const *const *ppp;
3514     static const char no_best_fit_chars_table_C2[] =
3515     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3516         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3517         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3518         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3519     static const char no_best_fit_chars_table_C2_ms[] =
3520     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3521         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3522         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3523         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3524     static const char no_best_fit_chars_table_932_C2[] =
3525     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3526         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3527         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3528         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3529     static const char no_best_fit_chars_table_932_C3[] =
3530     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3531         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3532         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3533         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3534     nkf_char ret = 0;
3535
3536     if(c2 < 0x80){
3537         *p2 = 0;
3538         *p1 = c2;
3539     }else if(c2 < 0xe0){
3540         if(no_best_fit_chars_f){
3541             if(ms_ucs_map_f == UCS_MAP_CP932){
3542                 switch(c2){
3543                 case 0xC2:
3544                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3545                     break;
3546                 case 0xC3:
3547                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3548                     break;
3549                 }
3550             }else if(!cp932inv_f){
3551                 switch(c2){
3552                 case 0xC2:
3553                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3554                     break;
3555                 case 0xC3:
3556                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3557                     break;
3558                 }
3559             }else if(ms_ucs_map_f == UCS_MAP_MS){
3560                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3561             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3562                 switch(c2){
3563                 case 0xC2:
3564                     switch(c1){
3565                     case 0xA2:
3566                     case 0xA3:
3567                     case 0xA5:
3568                     case 0xA6:
3569                     case 0xAC:
3570                     case 0xAF:
3571                     case 0xB8:
3572                         return 1;
3573                     }
3574                     break;
3575                 }
3576             }
3577         }
3578         pp =
3579             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3580             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3581             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3582             utf8_to_euc_2bytes;
3583         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3584     }else if(c0 < 0xF0){
3585         if(no_best_fit_chars_f){
3586             if(ms_ucs_map_f == UCS_MAP_CP932){
3587                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3588             }else if(ms_ucs_map_f == UCS_MAP_MS){
3589                 switch(c2){
3590                 case 0xE2:
3591                     switch(c1){
3592                     case 0x80:
3593                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3594                         break;
3595                     case 0x88:
3596                         if(c0 == 0x92) return 1;
3597                         break;
3598                     }
3599                     break;
3600                 case 0xE3:
3601                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3602                     break;
3603                 }
3604             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3605                 switch(c2){
3606                 case 0xE3:
3607                     switch(c1){
3608                     case 0x82:
3609                             if(c0 == 0x94) return 1;
3610                         break;
3611                     case 0x83:
3612                             if(c0 == 0xBB) return 1;
3613                         break;
3614                     }
3615                     break;
3616                 }
3617             }else{
3618                 switch(c2){
3619                 case 0xE2:
3620                     switch(c1){
3621                     case 0x80:
3622                         if(c0 == 0x95) return 1;
3623                         break;
3624                     case 0x88:
3625                         if(c0 == 0xA5) return 1;
3626                         break;
3627                     }
3628                     break;
3629                 case 0xEF:
3630                     switch(c1){
3631                     case 0xBC:
3632                         if(c0 == 0x8D) return 1;
3633                         break;
3634                     case 0xBD:
3635                         if(c0 == 0x9E && !cp932inv_f) return 1;
3636                         break;
3637                     case 0xBF:
3638                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3639                         break;
3640                     }
3641                     break;
3642                 }
3643             }
3644         }
3645         ppp =
3646             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3647             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3648             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3649             utf8_to_euc_3bytes;
3650         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3651     }else return -1;
3652 #ifdef SHIFTJIS_CP932
3653     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3654         nkf_char s2, s1;
3655         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3656             s2e_conv(s2, s1, p2, p1);
3657         }else{
3658             ret = 1;
3659         }
3660     }
3661 #endif
3662     return ret;
3663 }
3664
3665 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3666 {
3667     nkf_char c2;
3668     const unsigned short *p;
3669     unsigned short val;
3670
3671     if (pp == 0) return 1;
3672
3673     c1 -= 0x80;
3674     if (c1 < 0 || psize <= c1) return 1;
3675     p = pp[c1];
3676     if (p == 0)  return 1;
3677
3678     c0 -= 0x80;
3679     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3680     val = p[c0];
3681     if (val == 0) return 1;
3682     if (no_cp932ext_f && (
3683         (val>>8) == 0x2D || /* NEC special characters */
3684         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3685         )) return 1;
3686
3687     c2 = val >> 8;
3688    if (val > 0x7FFF){
3689         c2 &= 0x7f;
3690         c2 |= PREFIX_EUCG3;
3691     }
3692     if (c2 == SO) c2 = X0201;
3693     c1 = val & 0x7f;
3694     if (p2) *p2 = c2;
3695     if (p1) *p1 = c1;
3696     return 0;
3697 }
3698
3699 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3700 {
3701     int shift = 20;
3702     c &= VALUE_MASK;
3703     while(shift >= 0){
3704         if(c >= 1<<shift){
3705             while(shift >= 0){
3706                 (*f)(0, bin2hex(c>>shift));
3707                 shift -= 4;
3708             }
3709         }else{
3710             shift -= 4;
3711         }
3712     }
3713     return;
3714 }
3715
3716 void encode_fallback_html(nkf_char c)
3717 {
3718     (*oconv)(0, '&');
3719     (*oconv)(0, '#');
3720     c &= VALUE_MASK;
3721     if(c >= NKF_INT32_C(1000000))
3722         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3723     if(c >= NKF_INT32_C(100000))
3724         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3725     if(c >= 10000)
3726         (*oconv)(0, 0x30+(c/10000  )%10);
3727     if(c >= 1000)
3728         (*oconv)(0, 0x30+(c/1000   )%10);
3729     if(c >= 100)
3730         (*oconv)(0, 0x30+(c/100    )%10);
3731     if(c >= 10)
3732         (*oconv)(0, 0x30+(c/10     )%10);
3733     if(c >= 0)
3734         (*oconv)(0, 0x30+ c         %10);
3735     (*oconv)(0, ';');
3736     return;
3737 }
3738
3739 void encode_fallback_xml(nkf_char c)
3740 {
3741     (*oconv)(0, '&');
3742     (*oconv)(0, '#');
3743     (*oconv)(0, 'x');
3744     nkf_each_char_to_hex(oconv, c);
3745     (*oconv)(0, ';');
3746     return;
3747 }
3748
3749 void encode_fallback_java(nkf_char c)
3750 {
3751     (*oconv)(0, '\\');
3752     c &= VALUE_MASK;
3753     if(!is_unicode_bmp(c)){
3754         (*oconv)(0, 'U');
3755         (*oconv)(0, '0');
3756         (*oconv)(0, '0');
3757         (*oconv)(0, bin2hex(c>>20));
3758         (*oconv)(0, bin2hex(c>>16));
3759     }else{
3760         (*oconv)(0, 'u');
3761     }
3762     (*oconv)(0, bin2hex(c>>12));
3763     (*oconv)(0, bin2hex(c>> 8));
3764     (*oconv)(0, bin2hex(c>> 4));
3765     (*oconv)(0, bin2hex(c    ));
3766     return;
3767 }
3768
3769 void encode_fallback_perl(nkf_char c)
3770 {
3771     (*oconv)(0, '\\');
3772     (*oconv)(0, 'x');
3773     (*oconv)(0, '{');
3774     nkf_each_char_to_hex(oconv, c);
3775     (*oconv)(0, '}');
3776     return;
3777 }
3778
3779 void encode_fallback_subchar(nkf_char c)
3780 {
3781     c = unicode_subchar;
3782     (*oconv)((c>>8)&0xFF, c&0xFF);
3783     return;
3784 }
3785 #endif
3786
3787 #ifdef UTF8_OUTPUT_ENABLE
3788 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3789 {
3790     const unsigned short *p;
3791
3792     if (c2 == X0201) {
3793         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3794             switch (c1) {
3795             case 0x20:
3796                 return 0xA0;
3797             case 0x7D:
3798                 return 0xA9;
3799             }
3800         }
3801         p = euc_to_utf8_1byte;
3802 #ifdef X0212_ENABLE
3803     } else if (is_eucg3(c2)){
3804         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3805             return 0xA6;
3806         }
3807         c2 = (c2&0x7f) - 0x21;
3808         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3809             p = x0212_to_utf8_2bytes[c2];
3810         else
3811             return 0;
3812 #endif
3813     } else {
3814         c2 &= 0x7f;
3815         c2 = (c2&0x7f) - 0x21;
3816         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3817             p =
3818                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3819                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3820                 euc_to_utf8_2bytes_ms[c2];
3821         else
3822             return 0;
3823     }
3824     if (!p) return 0;
3825     c1 = (c1 & 0x7f) - 0x21;
3826     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3827         return p[c1];
3828     return 0;
3829 }
3830
3831 void w_oconv(nkf_char c2, nkf_char c1)
3832 {
3833     nkf_char c0;
3834     nkf_char val;
3835
3836     if (output_bom_f) {
3837         output_bom_f = FALSE;
3838         (*o_putc)('\357');
3839         (*o_putc)('\273');
3840         (*o_putc)('\277');
3841     }
3842
3843     if (c2 == EOF) {
3844         (*o_putc)(EOF);
3845         return;
3846     }
3847
3848 #ifdef NUMCHAR_OPTION
3849     if (c2 == 0 && is_unicode_capsule(c1)){
3850         val = c1 & VALUE_MASK;
3851         if (val < 0x80){
3852             (*o_putc)(val);
3853         }else if (val < 0x800){
3854             (*o_putc)(0xC0 | (val >> 6));
3855             (*o_putc)(0x80 | (val & 0x3f));
3856         } else if (val <= NKF_INT32_C(0xFFFF)) {
3857             (*o_putc)(0xE0 | (val >> 12));
3858             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3859             (*o_putc)(0x80 | (val        & 0x3f));
3860         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3861             (*o_putc)(0xF0 | ( val>>18));
3862             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3863             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3864             (*o_putc)(0x80 | ( val      & 0x3f));
3865         }
3866         return;
3867     }
3868 #endif
3869
3870     if (c2 == 0) {
3871         output_mode = ASCII;
3872         (*o_putc)(c1);
3873     } else if (c2 == ISO8859_1) {
3874         output_mode = UTF8;
3875         (*o_putc)(c1 | 0x080);
3876     } else {
3877         output_mode = UTF8;
3878         val = e2w_conv(c2, c1);
3879         if (val){
3880             w16w_conv(val, &c2, &c1, &c0);
3881             (*o_putc)(c2);
3882             if (c1){
3883                 (*o_putc)(c1);
3884                 if (c0) (*o_putc)(c0);
3885             }
3886         }
3887     }
3888 }
3889
3890 void w_oconv16(nkf_char c2, nkf_char c1)
3891 {
3892     if (output_bom_f) {
3893         output_bom_f = FALSE;
3894         if (output_endian == ENDIAN_LITTLE){
3895             (*o_putc)((unsigned char)'\377');
3896             (*o_putc)('\376');
3897         }else{
3898             (*o_putc)('\376');
3899             (*o_putc)((unsigned char)'\377');
3900         }
3901     }
3902
3903     if (c2 == EOF) {
3904         (*o_putc)(EOF);
3905         return;
3906     }
3907
3908     if (c2 == ISO8859_1) {
3909         c2 = 0;
3910         c1 |= 0x80;
3911 #ifdef NUMCHAR_OPTION
3912     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3913         if (is_unicode_bmp(c1)) {
3914             c2 = (c1 >> 8) & 0xff;
3915             c1 &= 0xff;
3916         } else {
3917             c1 &= VALUE_MASK;
3918             if (c1 <= UNICODE_MAX) {
3919                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3920                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3921                 if (output_endian == ENDIAN_LITTLE){
3922                     (*o_putc)(c2 & 0xff);
3923                     (*o_putc)((c2 >> 8) & 0xff);
3924                     (*o_putc)(c1 & 0xff);
3925                     (*o_putc)((c1 >> 8) & 0xff);
3926                 }else{
3927                     (*o_putc)((c2 >> 8) & 0xff);
3928                     (*o_putc)(c2 & 0xff);
3929                     (*o_putc)((c1 >> 8) & 0xff);
3930                     (*o_putc)(c1 & 0xff);
3931                 }
3932             }
3933             return;
3934         }
3935 #endif
3936     } else if (c2) {
3937         nkf_char val = e2w_conv(c2, c1);
3938         c2 = (val >> 8) & 0xff;
3939         c1 = val & 0xff;
3940         if (!val) return;
3941     }
3942     if (output_endian == ENDIAN_LITTLE){
3943         (*o_putc)(c1);
3944         (*o_putc)(c2);
3945     }else{
3946         (*o_putc)(c2);
3947         (*o_putc)(c1);
3948     }
3949 }
3950
3951 void w_oconv32(nkf_char c2, nkf_char c1)
3952 {
3953     if (output_bom_f) {
3954         output_bom_f = FALSE;
3955         if (output_endian == ENDIAN_LITTLE){
3956             (*o_putc)((unsigned char)'\377');
3957             (*o_putc)('\376');
3958             (*o_putc)('\000');
3959             (*o_putc)('\000');
3960         }else{
3961             (*o_putc)('\000');
3962             (*o_putc)('\000');
3963             (*o_putc)('\376');
3964             (*o_putc)((unsigned char)'\377');
3965         }
3966     }
3967
3968     if (c2 == EOF) {
3969         (*o_putc)(EOF);
3970         return;
3971     }
3972
3973     if (c2 == ISO8859_1) {
3974         c1 |= 0x80;
3975 #ifdef NUMCHAR_OPTION
3976     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3977         c1 &= VALUE_MASK;
3978 #endif
3979     } else if (c2) {
3980         c1 = e2w_conv(c2, c1);
3981         if (!c1) return;
3982     }
3983     if (output_endian == ENDIAN_LITTLE){
3984         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3985         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3986         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3987         (*o_putc)('\000');
3988     }else{
3989         (*o_putc)('\000');
3990         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3991         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3992         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3993     }
3994 }
3995 #endif
3996
3997 void e_oconv(nkf_char c2, nkf_char c1)
3998 {
3999 #ifdef NUMCHAR_OPTION
4000     if (c2 == 0 && is_unicode_capsule(c1)){
4001         w16e_conv(c1, &c2, &c1);
4002         if (c2 == 0 && is_unicode_capsule(c1)){
4003             c2 = c1 & VALUE_MASK;
4004             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
4005                 /* eucJP-ms UDC */
4006                 c1 &= 0xFFF;
4007                 c2 = c1 / 94;
4008                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
4009                 c1 = 0x21 + c1 % 94;
4010                 if (is_eucg3(c2)){
4011                     (*o_putc)(0x8f);
4012                     (*o_putc)((c2 & 0x7f) | 0x080);
4013                     (*o_putc)(c1 | 0x080);
4014                 }else{
4015                     (*o_putc)((c2 & 0x7f) | 0x080);
4016                     (*o_putc)(c1 | 0x080);
4017                 }
4018                 return;
4019             } else {
4020                 if (encode_fallback) (*encode_fallback)(c1);
4021                 return;
4022             }
4023         }
4024     }
4025 #endif
4026     if (c2 == EOF) {
4027         (*o_putc)(EOF);
4028         return;
4029     } else if (c2 == 0) {
4030         output_mode = ASCII;
4031         (*o_putc)(c1);
4032     } else if (c2 == X0201) {
4033         output_mode = JAPANESE_EUC;
4034         (*o_putc)(SSO); (*o_putc)(c1|0x80);
4035     } else if (c2 == ISO8859_1) {
4036         output_mode = ISO8859_1;
4037         (*o_putc)(c1 | 0x080);
4038 #ifdef X0212_ENABLE
4039     } else if (is_eucg3(c2)){
4040         output_mode = JAPANESE_EUC;
4041 #ifdef SHIFTJIS_CP932
4042         if (!cp932inv_f){
4043             nkf_char s2, s1;
4044             if (e2s_conv(c2, c1, &s2, &s1) == 0){
4045                 s2e_conv(s2, s1, &c2, &c1);
4046             }
4047         }
4048 #endif
4049         if (c2 == 0) {
4050             output_mode = ASCII;
4051             (*o_putc)(c1);
4052         }else if (is_eucg3(c2)){
4053             if (x0212_f){
4054                 (*o_putc)(0x8f);
4055                 (*o_putc)((c2 & 0x7f) | 0x080);
4056                 (*o_putc)(c1 | 0x080);
4057             }
4058         }else{
4059             (*o_putc)((c2 & 0x7f) | 0x080);
4060             (*o_putc)(c1 | 0x080);
4061         }
4062 #endif
4063     } else {
4064         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
4065             set_iconv(FALSE, 0);
4066             return; /* too late to rescue this char */
4067         }
4068         output_mode = JAPANESE_EUC;
4069         (*o_putc)(c2 | 0x080);
4070         (*o_putc)(c1 | 0x080);
4071     }
4072 }
4073
4074 #ifdef X0212_ENABLE
4075 nkf_char x0212_shift(nkf_char c)
4076 {
4077     nkf_char ret = c;
4078     c &= 0x7f;
4079     if (is_eucg3(ret)){
4080         if (0x75 <= c && c <= 0x7f){
4081             ret = c + (0x109 - 0x75);
4082         }
4083     }else{
4084         if (0x75 <= c && c <= 0x7f){
4085             ret = c + (0x113 - 0x75);
4086         }
4087     }
4088     return ret;
4089 }
4090
4091
4092 nkf_char x0212_unshift(nkf_char c)
4093 {
4094     nkf_char ret = c;
4095     if (0x7f <= c && c <= 0x88){
4096         ret = c + (0x75 - 0x7f);
4097     }else if (0x89 <= c && c <= 0x92){
4098         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4099     }
4100     return ret;
4101 }
4102 #endif /* X0212_ENABLE */
4103
4104 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4105 {
4106     nkf_char ndx;
4107     if (is_eucg3(c2)){
4108         ndx = c2 & 0x7f;
4109         if (x0213_f){
4110             if((0x21 <= ndx && ndx <= 0x2F)){
4111                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4112                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4113                 return 0;
4114             }else if(0x6E <= ndx && ndx <= 0x7E){
4115                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4116                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4117                 return 0;
4118             }
4119             return 1;
4120         }
4121 #ifdef X0212_ENABLE
4122         else if(nkf_isgraph(ndx)){
4123             nkf_char val = 0;
4124             const unsigned short *ptr;
4125             ptr = x0212_shiftjis[ndx - 0x21];
4126             if (ptr){
4127                 val = ptr[(c1 & 0x7f) - 0x21];
4128             }
4129             if (val){
4130                 c2 = val >> 8;
4131                 c1 = val & 0xff;
4132                 if (p2) *p2 = c2;
4133                 if (p1) *p1 = c1;
4134                 return 0;
4135             }
4136             c2 = x0212_shift(c2);
4137         }
4138 #endif /* X0212_ENABLE */
4139     }
4140     if(0x7F < c2) return 1;
4141     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4142     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4143     return 0;
4144 }
4145
4146 void s_oconv(nkf_char c2, nkf_char c1)
4147 {
4148 #ifdef NUMCHAR_OPTION
4149     if (c2 == 0 && is_unicode_capsule(c1)){
4150         w16e_conv(c1, &c2, &c1);
4151         if (c2 == 0 && is_unicode_capsule(c1)){
4152             c2 = c1 & VALUE_MASK;
4153             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
4154                 /* CP932 UDC */
4155                 c1 &= 0xFFF;
4156                 c2 = c1 / 188 + 0xF0;
4157                 c1 = c1 % 188;
4158                 c1 += 0x40 + (c1 > 0x3e);
4159                 (*o_putc)(c2);
4160                 (*o_putc)(c1);
4161                 return;
4162             } else {
4163                 if(encode_fallback)(*encode_fallback)(c1);
4164                 return;
4165             }
4166         }
4167     }
4168 #endif
4169     if (c2 == EOF) {
4170         (*o_putc)(EOF);
4171         return;
4172     } else if (c2 == 0) {
4173         output_mode = ASCII;
4174         (*o_putc)(c1);
4175     } else if (c2 == X0201) {
4176         output_mode = SHIFT_JIS;
4177         (*o_putc)(c1|0x80);
4178     } else if (c2 == ISO8859_1) {
4179         output_mode = ISO8859_1;
4180         (*o_putc)(c1 | 0x080);
4181 #ifdef X0212_ENABLE
4182     } else if (is_eucg3(c2)){
4183         output_mode = SHIFT_JIS;
4184         if (e2s_conv(c2, c1, &c2, &c1) == 0){
4185             (*o_putc)(c2);
4186             (*o_putc)(c1);
4187         }
4188 #endif
4189     } else {
4190         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4191             set_iconv(FALSE, 0);
4192             return; /* too late to rescue this char */
4193         }
4194         output_mode = SHIFT_JIS;
4195         e2s_conv(c2, c1, &c2, &c1);
4196
4197 #ifdef SHIFTJIS_CP932
4198         if (cp932inv_f
4199             && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
4200