OSDN Git Service

* constant DEFAULT_NEWLINE added.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.154 2007/12/18 18:20:16 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-12-19"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #ifndef MIME_DECODE_DEFAULT
44 #define MIME_DECODE_DEFAULT STRICT_MIME
45 #endif
46 #ifndef X0201_DEFAULT
47 #define X0201_DEFAULT TRUE
48 #endif
49
50 #if DEFAULT_NEWLINE == 0x0D0A
51 #define PUT_NEWLINE(func) do {\
52     func(0x0D);\
53     func(0x0A);\
54 } while (0)
55 #define OCONV_NEWLINE(func) do {\
56     func(0, 0x0D);\
57     func(0, 0x0A);\
58 } while (0)
59 #elif DEFAULT_NEWLINE == 0x0D
60 #define PUT_NEWLINE(func) func(0x0D)
61 #define OCONV_NEWLINE(func) func(0, 0x0D)
62 #else
63 #define DEFAULT_NEWLINE 0x0A
64 #define PUT_NEWLINE(func) func(0x0A)
65 #define OCONV_NEWLINE(func) func(0, 0x0A)
66 #endif
67
68 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
69 #define MSDOS
70 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
71 #define __WIN32__
72 #endif
73 #endif
74
75 #ifdef PERL_XS
76 #undef OVERWRITE
77 #endif
78
79 #ifndef PERL_XS
80 #include <stdio.h>
81 #endif
82
83 #include <stdlib.h>
84 #include <string.h>
85
86 #if defined(MSDOS) || defined(__OS2__)
87 #include <fcntl.h>
88 #include <io.h>
89 #if defined(_MSC_VER) || defined(__WATCOMC__)
90 #define mktemp _mktemp
91 #endif
92 #endif
93
94 #ifdef MSDOS
95 #ifdef LSI_C
96 #define setbinmode(fp) fsetbin(fp)
97 #elif defined(__DJGPP__)
98 #include <libc/dosio.h>
99 #define setbinmode(fp) djgpp_setbinmode(fp)
100 #else /* Microsoft C, Turbo C */
101 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
102 #endif
103 #else /* UNIX */
104 #define setbinmode(fp)
105 #endif
106
107 #if defined(__DJGPP__)
108 void  djgpp_setbinmode(FILE *fp)
109 {
110     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
111     int fd, m;
112     fd = fileno(fp);
113     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
114     __file_handle_set(fd, m);
115 }
116 #endif
117
118 #ifdef _IOFBF /* SysV and MSDOS, Windows */
119 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
120 #else /* BSD */
121 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
122 #endif
123
124 /*Borland C++ 4.5 EasyWin*/
125 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
126 #define         EASYWIN
127 #ifndef __WIN16__
128 #define __WIN16__
129 #endif
130 #include <windows.h>
131 #endif
132
133 #ifdef OVERWRITE
134 /* added by satoru@isoternet.org */
135 #if defined(__EMX__)
136 #include <sys/types.h>
137 #endif
138 #include <sys/stat.h>
139 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
140 #include <unistd.h>
141 #if defined(__WATCOMC__)
142 #include <sys/utime.h>
143 #else
144 #include <utime.h>
145 #endif
146 #else /* defined(MSDOS) */
147 #ifdef __WIN32__
148 #ifdef __BORLANDC__ /* BCC32 */
149 #include <utime.h>
150 #else /* !defined(__BORLANDC__) */
151 #include <sys/utime.h>
152 #endif /* (__BORLANDC__) */
153 #else /* !defined(__WIN32__) */
154 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
155 #include <sys/utime.h>
156 #elif defined(__TURBOC__) /* BCC */
157 #include <utime.h>
158 #elif defined(LSI_C) /* LSI C */
159 #endif /* (__WIN32__) */
160 #endif
161 #endif
162 #endif
163
164 #define         FALSE   0
165 #define         TRUE    1
166
167 /* state of output_mode and input_mode
168
169    c2           0 means ASCII
170                 X0201
171                 ISO8859_1
172                 X0208
173                 EOF      all termination
174    c1           32bit data
175
176  */
177
178 #define         ASCII           0
179 #define         X0208           1
180 #define         X0201           2
181 #define         ISO8859_1       8
182 #define         X0212      0x2844
183 #define         X0213_1    0x284F
184 #define         X0213_2    0x2850
185
186 /* Input Assumption */
187
188 #define         JIS_INPUT       4
189 #define         EUC_INPUT      16
190 #define         SJIS_INPUT      5
191 #define         LATIN1_INPUT    6
192 #define         FIXED_MIME      7
193 #define         STRICT_MIME     8
194
195 /* MIME ENCODE */
196
197 #define         ISO2022JP       9
198 #define         JAPANESE_EUC   10
199 #define         SHIFT_JIS      11
200
201 #define         UTF8           12
202 #define         UTF8_INPUT     13
203 #define         UTF16_INPUT    1015
204 #define         UTF32_INPUT    1017
205
206 /* byte order */
207
208 #define         ENDIAN_BIG      1234
209 #define         ENDIAN_LITTLE   4321
210 #define         ENDIAN_2143     2143
211 #define         ENDIAN_3412     3412
212
213 /* ASCII CODE */
214
215 #define         BS      0x08
216 #define         TAB     0x09
217 #define         LF      0x0a
218 #define         CR      0x0d
219 #define         ESC     0x1b
220 #define         SP      0x20
221 #define         AT      0x40
222 #define         SSP     0xa0
223 #define         DEL     0x7f
224 #define         SI      0x0f
225 #define         SO      0x0e
226 #define         SSO     0x8e
227 #define         SS3     0x8f
228 #define         CRLF    0x0D0A
229
230 #define         is_alnum(c)  \
231             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
232
233 /* I don't trust portablity of toupper */
234 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
235 #define nkf_isoctal(c)  ('0'<=c && c<='7')
236 #define nkf_isdigit(c)  ('0'<=c && c<='9')
237 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
238 #define nkf_isblank(c) (c == SP || c == TAB)
239 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
240 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
241 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
242 #define nkf_isprint(c) (SP<=c && c<='~')
243 #define nkf_isgraph(c) ('!'<=c && c<='~')
244 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
245                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
246                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
247 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
248 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
249 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
250     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
251      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
252
253 #define CP932_TABLE_BEGIN 0xFA
254 #define CP932_TABLE_END   0xFC
255 #define CP932INV_TABLE_BEGIN 0xED
256 #define CP932INV_TABLE_END   0xEE
257 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
258
259 #define         HOLD_SIZE       1024
260 #if defined(INT_IS_SHORT)
261 #define         IOBUF_SIZE      2048
262 #else
263 #define         IOBUF_SIZE      16384
264 #endif
265
266 #define         DEFAULT_J       'B'
267 #define         DEFAULT_R       'B'
268
269 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
270 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
271
272 #define         RANGE_NUM_MAX   18
273 #define         GETA1   0x22
274 #define         GETA2   0x2e
275
276
277 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
278 #define sizeof_euc_to_utf8_1byte 94
279 #define sizeof_euc_to_utf8_2bytes 94
280 #define sizeof_utf8_to_euc_C2 64
281 #define sizeof_utf8_to_euc_E5B8 64
282 #define sizeof_utf8_to_euc_2bytes 112
283 #define sizeof_utf8_to_euc_3bytes 16
284 #endif
285
286 /* MIME preprocessor */
287
288 #ifdef EASYWIN /*Easy Win */
289 extern POINT _BufferSize;
290 #endif
291
292 struct input_code{
293     char *name;
294     nkf_char stat;
295     nkf_char score;
296     nkf_char index;
297     nkf_char buf[3];
298     void (*status_func)(struct input_code *, nkf_char);
299     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
300     int _file_stat;
301 };
302
303 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
304
305 #if !defined(PERL_XS) && !defined(WIN32DLL)
306 static  nkf_char     noconvert(FILE *f);
307 #endif
308 static  void    module_connection(void);
309 static  nkf_char     kanji_convert(FILE *f);
310 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
311 static  nkf_char     push_hold_buf(nkf_char c2);
312 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
313 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
314 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
315 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
316 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
317 /* UCS Mapping
318  * 0: Shift_JIS, eucJP-ascii
319  * 1: eucJP-ms
320  * 2: CP932, CP51932
321  * 3: CP10001
322  */
323 #define UCS_MAP_ASCII   0
324 #define UCS_MAP_MS      1
325 #define UCS_MAP_CP932   2
326 #define UCS_MAP_CP10001 3
327 static int ms_ucs_map_f = UCS_MAP_ASCII;
328 #endif
329 #ifdef UTF8_INPUT_ENABLE
330 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
331 static  int     no_cp932ext_f = FALSE;
332 /* ignore ZERO WIDTH NO-BREAK SPACE */
333 static  int     no_best_fit_chars_f = FALSE;
334 static  int     input_endian = ENDIAN_BIG;
335 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
336 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
337 static  void    encode_fallback_html(nkf_char c);
338 static  void    encode_fallback_xml(nkf_char c);
339 static  void    encode_fallback_java(nkf_char c);
340 static  void    encode_fallback_perl(nkf_char c);
341 static  void    encode_fallback_subchar(nkf_char c);
342 static  void    (*encode_fallback)(nkf_char c) = NULL;
343 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
344 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
345 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
346 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
347 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
348 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
349 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
350 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
351 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
352 static  void    w_status(struct input_code *, nkf_char);
353 #endif
354 #ifdef UTF8_OUTPUT_ENABLE
355 static  int     output_bom_f = FALSE;
356 static  int     output_endian = ENDIAN_BIG;
357 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
358 static  void    w_oconv(nkf_char c2,nkf_char c1);
359 static  void    w_oconv16(nkf_char c2,nkf_char c1);
360 static  void    w_oconv32(nkf_char c2,nkf_char c1);
361 #endif
362 static  void    e_oconv(nkf_char c2,nkf_char c1);
363 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
364 static  void    s_oconv(nkf_char c2,nkf_char c1);
365 static  void    j_oconv(nkf_char c2,nkf_char c1);
366 static  void    fold_conv(nkf_char c2,nkf_char c1);
367 static  void    nl_conv(nkf_char c2,nkf_char c1);
368 static  void    z_conv(nkf_char c2,nkf_char c1);
369 static  void    rot_conv(nkf_char c2,nkf_char c1);
370 static  void    hira_conv(nkf_char c2,nkf_char c1);
371 static  void    base64_conv(nkf_char c2,nkf_char c1);
372 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
373 static  void    no_connection(nkf_char c2,nkf_char c1);
374 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
375
376 static  void    code_score(struct input_code *ptr);
377 static  void    code_status(nkf_char c);
378
379 static  void    std_putc(nkf_char c);
380 static  nkf_char     std_getc(FILE *f);
381 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
382
383 static  nkf_char     broken_getc(FILE *f);
384 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
385
386 static  nkf_char     mime_begin(FILE *f);
387 static  nkf_char     mime_getc(FILE *f);
388 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
389
390 static  void    switch_mime_getc(void);
391 static  void    unswitch_mime_getc(void);
392 static  nkf_char     mime_begin_strict(FILE *f);
393 static  nkf_char     mime_getc_buf(FILE *f);
394 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
395 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
396
397 static  nkf_char     base64decode(nkf_char c);
398 static  void    mime_prechar(nkf_char c2, nkf_char c1);
399 static  void    mime_putc(nkf_char c);
400 static  void    open_mime(nkf_char c);
401 static  void    close_mime(void);
402 static  void    eof_mime(void);
403 static  void    mimeout_addchar(nkf_char c);
404 #ifndef PERL_XS
405 static  void    usage(void);
406 static  void    version(void);
407 static  void    show_configuration(void);
408 #endif
409 static  void    options(unsigned char *c);
410 static  void    reinit(void);
411
412 /* buffers */
413
414 #if !defined(PERL_XS) && !defined(WIN32DLL)
415 static unsigned char   stdibuf[IOBUF_SIZE];
416 static unsigned char   stdobuf[IOBUF_SIZE];
417 #endif
418 static unsigned char   hold_buf[HOLD_SIZE*2];
419 static int             hold_count = 0;
420
421 /* MIME preprocessor fifo */
422
423 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
424 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
425 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
426 static unsigned char           mime_buf[MIME_BUF_SIZE];
427 static unsigned int            mime_top = 0;
428 static unsigned int            mime_last = 0;  /* decoded */
429 static unsigned int            mime_input = 0; /* undecoded */
430 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
431
432 /* flags */
433 static int             unbuf_f = FALSE;
434 static int             estab_f = FALSE;
435 static int             nop_f = FALSE;
436 static int             binmode_f = TRUE;       /* binary mode */
437 static int             rot_f = FALSE;          /* rot14/43 mode */
438 static int             hira_f = FALSE;          /* hira/kata henkan */
439 static int             input_f = FALSE;        /* non fixed input code  */
440 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
441 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
442 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
443 static int             mimebuf_f = FALSE;      /* MIME buffered input */
444 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
445 static int             iso8859_f = FALSE;      /* ISO8859 through */
446 static int             mimeout_f = FALSE;       /* base64 mode */
447 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
448 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
449
450 #ifdef UNICODE_NORMALIZATION
451 static int nfc_f = FALSE;
452 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
453 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
454 static nkf_char nfc_getc(FILE *f);
455 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
456 #endif
457
458 #ifdef INPUT_OPTION
459 static int cap_f = FALSE;
460 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
461 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
462 static nkf_char cap_getc(FILE *f);
463 static nkf_char cap_ungetc(nkf_char c,FILE *f);
464
465 static int url_f = FALSE;
466 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
467 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
468 static nkf_char url_getc(FILE *f);
469 static nkf_char url_ungetc(nkf_char c,FILE *f);
470 #endif
471
472 #if defined(INT_IS_SHORT)
473 #define NKF_INT32_C(n)   (n##L)
474 #else
475 #define NKF_INT32_C(n)   (n)
476 #endif
477 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
478 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
479 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
480 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
481 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
482 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
483 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
484
485 #ifdef NUMCHAR_OPTION
486 static int numchar_f = FALSE;
487 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
488 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
489 static nkf_char numchar_getc(FILE *f);
490 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
491 #endif
492
493 #ifdef CHECK_OPTION
494 static int noout_f = FALSE;
495 static void no_putc(nkf_char c);
496 static int debug_f = FALSE;
497 static void debug(const char *str);
498 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
499 #endif
500
501 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
502 #if !defined PERL_XS
503 static  void    print_guessed_code(char *filename);
504 #endif
505 static  void    set_input_codename(char *codename);
506
507 #ifdef EXEC_IO
508 static int exec_f = 0;
509 #endif
510
511 #ifdef SHIFTJIS_CP932
512 /* invert IBM extended characters to others */
513 static int cp51932_f = FALSE;
514
515 /* invert NEC-selected IBM extended characters to IBM extended characters */
516 static int cp932inv_f = TRUE;
517
518 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
519 #endif /* SHIFTJIS_CP932 */
520
521 #ifdef X0212_ENABLE
522 static int x0212_f = FALSE;
523 static nkf_char x0212_shift(nkf_char c);
524 static nkf_char x0212_unshift(nkf_char c);
525 #endif
526 static int x0213_f = FALSE;
527
528 static unsigned char prefix_table[256];
529
530 static void set_code_score(struct input_code *ptr, nkf_char score);
531 static void clr_code_score(struct input_code *ptr, nkf_char score);
532 static void status_disable(struct input_code *ptr);
533 static void status_push_ch(struct input_code *ptr, nkf_char c);
534 static void status_clear(struct input_code *ptr);
535 static void status_reset(struct input_code *ptr);
536 static void status_reinit(struct input_code *ptr);
537 static void status_check(struct input_code *ptr, nkf_char c);
538 static void e_status(struct input_code *, nkf_char);
539 static void s_status(struct input_code *, nkf_char);
540
541 struct input_code input_code_list[] = {
542     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
543     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
544 #ifdef UTF8_INPUT_ENABLE
545     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
546     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
547     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
548 #endif
549     {0}
550 };
551
552 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
553 static int              base64_count = 0;
554
555 /* X0208 -> ASCII converter */
556
557 /* fold parameter */
558 static int             f_line = 0;    /* chars in line */
559 static int             f_prev = 0;
560 static int             fold_preserve_f = FALSE; /* preserve new lines */
561 static int             fold_f  = FALSE;
562 static int             fold_len  = 0;
563
564 /* options */
565 static unsigned char   kanji_intro = DEFAULT_J;
566 static unsigned char   ascii_intro = DEFAULT_R;
567
568 /* Folding */
569
570 #define FOLD_MARGIN  10
571 #define DEFAULT_FOLD 60
572
573 static int             fold_margin  = FOLD_MARGIN;
574
575 /* converters */
576
577 #ifdef DEFAULT_CODE_JIS
578 #   define  DEFAULT_CONV j_oconv
579 #endif
580 #ifdef DEFAULT_CODE_SJIS
581 #   define  DEFAULT_CONV s_oconv
582 #endif
583 #ifdef DEFAULT_CODE_EUC
584 #   define  DEFAULT_CONV e_oconv
585 #endif
586 #ifdef DEFAULT_CODE_UTF8
587 #   define  DEFAULT_CONV w_oconv
588 #endif
589
590 /* process default */
591 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
592
593 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
594 /* s_iconv or oconv */
595 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
596
597 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
598 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
599 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
600 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
601 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
602 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
603 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
604
605 /* static redirections */
606
607 static  void   (*o_putc)(nkf_char c) = std_putc;
608
609 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
610 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
611
612 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
613 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
614
615 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
616
617 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
618 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
619
620 /* for strict mime */
621 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
622 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
623
624 /* Global states */
625 static int output_mode = ASCII,    /* output kanji mode */
626            input_mode =  ASCII,    /* input kanji mode */
627            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
628 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
629
630 /* X0201 / X0208 conversion tables */
631
632 /* X0201 kana conversion table */
633 /* 90-9F A0-DF */
634 static const unsigned char cv[]= {
635     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
636     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
637     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
638     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
639     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
640     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
641     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
642     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
643     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
644     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
645     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
646     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
647     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
648     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
649     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
650     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
651     0x00,0x00};
652
653
654 /* X0201 kana conversion table for daguten */
655 /* 90-9F A0-DF */
656 static const unsigned char dv[]= {
657     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
658     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
659     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
660     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
661     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
662     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
663     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
664     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
665     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
666     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
668     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
669     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
672     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673     0x00,0x00};
674
675 /* X0201 kana conversion table for han-daguten */
676 /* 90-9F A0-DF */
677 static const unsigned char ev[]= {
678     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
679     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
680     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
681     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
682     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
683     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
684     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
685     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
686     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
687     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
688     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
689     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
690     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
691     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
692     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
693     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
694     0x00,0x00};
695
696
697 /* X0208 kigou conversion table */
698 /* 0x8140 - 0x819e */
699 static const unsigned char fv[] = {
700
701     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
702     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
703     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
704     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
705     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
706     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
707     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
708     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
709     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
710     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
711     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
712     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
713 } ;
714
715
716
717 static int             file_out_f = FALSE;
718 #ifdef OVERWRITE
719 static int             overwrite_f = FALSE;
720 static int             preserve_time_f = FALSE;
721 static int             backup_f = FALSE;
722 static char            *backup_suffix = "";
723 static char *get_backup_filename(const char *suffix, const char *filename);
724 #endif
725
726 static int nlmode_f = 0;   /* CR, LF, CRLF */
727 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
728 static nkf_char prev_cr = 0; /* CR or 0 */
729 #ifdef EASYWIN /*Easy Win */
730 static int             end_check;
731 #endif /*Easy Win */
732
733 #define STD_GC_BUFSIZE (256)
734 nkf_char std_gc_buf[STD_GC_BUFSIZE];
735 nkf_char std_gc_ndx;
736
737 #ifdef WIN32DLL
738 #include "nkf32dll.c"
739 #elif defined(PERL_XS)
740 #else /* WIN32DLL */
741 int main(int argc, char **argv)
742 {
743     FILE  *fin;
744     unsigned char  *cp;
745
746     char *outfname = NULL;
747     char *origfname;
748
749 #ifdef EASYWIN /*Easy Win */
750     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
751 #endif
752
753     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
754         cp = (unsigned char *)*argv;
755         options(cp);
756         if (guess_f) {
757 #ifdef CHECK_OPTION
758             int debug_f_back = debug_f;
759 #endif
760 #ifdef EXEC_IO
761             int exec_f_back = exec_f;
762 #endif
763 #ifdef X0212_ENABLE
764             int x0212_f_back = x0212_f;
765 #endif
766             int x0213_f_back = x0213_f;
767             int guess_f_back = guess_f;
768             reinit();
769             guess_f = guess_f_back;
770             mime_f = FALSE;
771 #ifdef CHECK_OPTION
772             debug_f = debug_f_back;
773 #endif
774 #ifdef EXEC_IO
775             exec_f = exec_f_back;
776 #endif
777 #ifdef X0212_ENABLE
778             x0212_f = x0212_f_back;
779 #endif
780             x0213_f = x0213_f_back;
781         }
782 #ifdef EXEC_IO
783         if (exec_f){
784             int fds[2], pid;
785             if (pipe(fds) < 0 || (pid = fork()) < 0){
786                 abort();
787             }
788             if (pid == 0){
789                 if (exec_f > 0){
790                     close(fds[0]);
791                     dup2(fds[1], 1);
792                 }else{
793                     close(fds[1]);
794                     dup2(fds[0], 0);
795                 }
796                 execvp(argv[1], &argv[1]);
797             }
798             if (exec_f > 0){
799                 close(fds[1]);
800                 dup2(fds[0], 0);
801             }else{
802                 close(fds[0]);
803                 dup2(fds[1], 1);
804             }
805             argc = 0;
806             break;
807         }
808 #endif
809     }
810
811     if (binmode_f == TRUE)
812 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
813     if (freopen("","wb",stdout) == NULL)
814         return (-1);
815 #else
816     setbinmode(stdout);
817 #endif
818
819     if (unbuf_f)
820       setbuf(stdout, (char *) NULL);
821     else
822       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
823
824     if (argc == 0) {
825       if (binmode_f == TRUE)
826 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
827       if (freopen("","rb",stdin) == NULL) return (-1);
828 #else
829       setbinmode(stdin);
830 #endif
831       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
832       if (nop_f)
833           noconvert(stdin);
834       else {
835           kanji_convert(stdin);
836           if (guess_f) print_guessed_code(NULL);
837       }
838     } else {
839       int nfiles = argc;
840         int is_argument_error = FALSE;
841       while (argc--) {
842             input_codename = NULL;
843             input_newline = 0;
844 #ifdef CHECK_OPTION
845             iconv_for_check = 0;
846 #endif
847           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
848               perror(*--argv);
849                 *argv++;
850                 is_argument_error = TRUE;
851                 continue;
852           } else {
853 #ifdef OVERWRITE
854               int fd = 0;
855               int fd_backup = 0;
856 #endif
857
858 /* reopen file for stdout */
859               if (file_out_f == TRUE) {
860 #ifdef OVERWRITE
861                   if (overwrite_f){
862                       outfname = malloc(strlen(origfname)
863                                         + strlen(".nkftmpXXXXXX")
864                                         + 1);
865                       if (!outfname){
866                           perror(origfname);
867                           return -1;
868                       }
869                       strcpy(outfname, origfname);
870 #ifdef MSDOS
871                       {
872                           int i;
873                           for (i = strlen(outfname); i; --i){
874                               if (outfname[i - 1] == '/'
875                                   || outfname[i - 1] == '\\'){
876                                   break;
877                               }
878                           }
879                           outfname[i] = '\0';
880                       }
881                       strcat(outfname, "ntXXXXXX");
882                       mktemp(outfname);
883                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
884                                 S_IREAD | S_IWRITE);
885 #else
886                       strcat(outfname, ".nkftmpXXXXXX");
887                       fd = mkstemp(outfname);
888 #endif
889                       if (fd < 0
890                           || (fd_backup = dup(fileno(stdout))) < 0
891                           || dup2(fd, fileno(stdout)) < 0
892                           ){
893                           perror(origfname);
894                           return -1;
895                       }
896                   }else
897 #endif
898                   if(argc == 1) {
899                       outfname = *argv++;
900                       argc--;
901                   } else {
902                       outfname = "nkf.out";
903                   }
904
905                   if(freopen(outfname, "w", stdout) == NULL) {
906                       perror (outfname);
907                       return (-1);
908                   }
909                   if (binmode_f == TRUE) {
910 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
911                       if (freopen("","wb",stdout) == NULL)
912                            return (-1);
913 #else
914                       setbinmode(stdout);
915 #endif
916                   }
917               }
918               if (binmode_f == TRUE)
919 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
920                  if (freopen("","rb",fin) == NULL)
921                     return (-1);
922 #else
923                  setbinmode(fin);
924 #endif
925               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
926               if (nop_f)
927                   noconvert(fin);
928               else {
929                   char *filename = NULL;
930                   kanji_convert(fin);
931                   if (nfiles > 1) filename = origfname;
932                   if (guess_f) print_guessed_code(filename);
933               }
934               fclose(fin);
935 #ifdef OVERWRITE
936               if (overwrite_f) {
937                   struct stat     sb;
938 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
939                   time_t tb[2];
940 #else
941                   struct utimbuf  tb;
942 #endif
943
944                   fflush(stdout);
945                   close(fd);
946                   if (dup2(fd_backup, fileno(stdout)) < 0){
947                       perror("dup2");
948                   }
949                   if (stat(origfname, &sb)) {
950                       fprintf(stderr, "Can't stat %s\n", origfname);
951                   }
952                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
953                   if (chmod(outfname, sb.st_mode)) {
954                       fprintf(stderr, "Can't set permission %s\n", outfname);
955                   }
956
957                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
958                     if(preserve_time_f){
959 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
960                         tb[0] = tb[1] = sb.st_mtime;
961                         if (utime(outfname, tb)) {
962                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
963                         }
964 #else
965                         tb.actime  = sb.st_atime;
966                         tb.modtime = sb.st_mtime;
967                         if (utime(outfname, &tb)) {
968                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
969                         }
970 #endif
971                     }
972                     if(backup_f){
973                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
974 #ifdef MSDOS
975                         unlink(backup_filename);
976 #endif
977                         if (rename(origfname, backup_filename)) {
978                             perror(backup_filename);
979                             fprintf(stderr, "Can't rename %s to %s\n",
980                                     origfname, backup_filename);
981                         }
982                     }else{
983 #ifdef MSDOS
984                         if (unlink(origfname)){
985                             perror(origfname);
986                         }
987 #endif
988                     }
989                   if (rename(outfname, origfname)) {
990                       perror(origfname);
991                       fprintf(stderr, "Can't rename %s to %s\n",
992                               outfname, origfname);
993                   }
994                   free(outfname);
995               }
996 #endif
997           }
998       }
999         if (is_argument_error)
1000             return(-1);
1001     }
1002 #ifdef EASYWIN /*Easy Win */
1003     if (file_out_f == FALSE)
1004         scanf("%d",&end_check);
1005     else
1006         fclose(stdout);
1007 #else /* for Other OS */
1008     if (file_out_f == TRUE)
1009         fclose(stdout);
1010 #endif /*Easy Win */
1011     return (0);
1012 }
1013 #endif /* WIN32DLL */
1014
1015 #ifdef OVERWRITE
1016 char *get_backup_filename(const char *suffix, const char *filename)
1017 {
1018     char *backup_filename;
1019     int asterisk_count = 0;
1020     int i, j;
1021     int filename_length = strlen(filename);
1022
1023     for(i = 0; suffix[i]; i++){
1024         if(suffix[i] == '*') asterisk_count++;
1025     }
1026
1027     if(asterisk_count){
1028         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1029         if (!backup_filename){
1030             perror("Can't malloc backup filename.");
1031             return NULL;
1032         }
1033
1034         for(i = 0, j = 0; suffix[i];){
1035             if(suffix[i] == '*'){
1036                 backup_filename[j] = '\0';
1037                 strncat(backup_filename, filename, filename_length);
1038                 i++;
1039                 j += filename_length;
1040             }else{
1041                 backup_filename[j++] = suffix[i++];
1042             }
1043         }
1044         backup_filename[j] = '\0';
1045     }else{
1046         j = strlen(suffix) + filename_length;
1047         backup_filename = malloc( + 1);
1048         strcpy(backup_filename, filename);
1049         strcat(backup_filename, suffix);
1050         backup_filename[j] = '\0';
1051     }
1052     return backup_filename;
1053 }
1054 #endif
1055
1056 static const struct {
1057     const char *name;
1058     const char *alias;
1059 } long_option[] = {
1060     {"ic=", ""},
1061     {"oc=", ""},
1062     {"base64","jMB"},
1063     {"euc","e"},
1064     {"euc-input","E"},
1065     {"fj","jm"},
1066     {"help","v"},
1067     {"jis","j"},
1068     {"jis-input","J"},
1069     {"mac","sLm"},
1070     {"mime","jM"},
1071     {"mime-input","m"},
1072     {"msdos","sLw"},
1073     {"sjis","s"},
1074     {"sjis-input","S"},
1075     {"unix","eLu"},
1076     {"version","V"},
1077     {"windows","sLw"},
1078     {"hiragana","h1"},
1079     {"katakana","h2"},
1080     {"katakana-hiragana","h3"},
1081     {"guess=", ""},
1082     {"guess", "g1"},
1083     {"cp932", ""},
1084     {"no-cp932", ""},
1085 #ifdef X0212_ENABLE
1086     {"x0212", ""},
1087 #endif
1088 #ifdef UTF8_OUTPUT_ENABLE
1089     {"utf8", "w"},
1090     {"utf16", "w16"},
1091     {"ms-ucs-map", ""},
1092     {"fb-skip", ""},
1093     {"fb-html", ""},
1094     {"fb-xml", ""},
1095     {"fb-perl", ""},
1096     {"fb-java", ""},
1097     {"fb-subchar", ""},
1098     {"fb-subchar=", ""},
1099 #endif
1100 #ifdef UTF8_INPUT_ENABLE
1101     {"utf8-input", "W"},
1102     {"utf16-input", "W16"},
1103     {"no-cp932ext", ""},
1104     {"no-best-fit-chars",""},
1105 #endif
1106 #ifdef UNICODE_NORMALIZATION
1107     {"utf8mac-input", ""},
1108 #endif
1109 #ifdef OVERWRITE
1110     {"overwrite", ""},
1111     {"overwrite=", ""},
1112     {"in-place", ""},
1113     {"in-place=", ""},
1114 #endif
1115 #ifdef INPUT_OPTION
1116     {"cap-input", ""},
1117     {"url-input", ""},
1118 #endif
1119 #ifdef NUMCHAR_OPTION
1120     {"numchar-input", ""},
1121 #endif
1122 #ifdef CHECK_OPTION
1123     {"no-output", ""},
1124     {"debug", ""},
1125 #endif
1126 #ifdef SHIFTJIS_CP932
1127     {"cp932inv", ""},
1128 #endif
1129 #ifdef EXEC_IO
1130     {"exec-in", ""},
1131     {"exec-out", ""},
1132 #endif
1133     {"prefix=", ""},
1134 };
1135
1136 static int option_mode = 0;
1137
1138 void options(unsigned char *cp)
1139 {
1140     nkf_char i, j;
1141     unsigned char *p;
1142     unsigned char *cp_back = NULL;
1143     char codeset[32];
1144
1145     if (option_mode==1)
1146         return;
1147     while(*cp && *cp++!='-');
1148     while (*cp || cp_back) {
1149         if(!*cp){
1150             cp = cp_back;
1151             cp_back = NULL;
1152             continue;
1153         }
1154         p = 0;
1155         switch (*cp++) {
1156         case '-':  /* literal options */
1157             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1158                 option_mode = 1;
1159                 return;
1160             }
1161             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1162                 p = (unsigned char *)long_option[i].name;
1163                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1164                 if (*p == cp[j] || cp[j] == SP){
1165                     p = &cp[j] + 1;
1166                     break;
1167                 }
1168                 p = 0;
1169             }
1170             if (p == 0) {
1171                 fprintf(stderr, "unknown long option: --%s\n", cp);
1172                 return;
1173             }
1174             while(*cp && *cp != SP && cp++);
1175             if (long_option[i].alias[0]){
1176                 cp_back = cp;
1177                 cp = (unsigned char *)long_option[i].alias;
1178             }else{
1179                 if (strcmp(long_option[i].name, "ic=") == 0){
1180                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1181                         codeset[i] = nkf_toupper(p[i]);
1182                     }
1183                     codeset[i] = 0;
1184                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1185                         input_f = JIS_INPUT;
1186                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1187                       strcmp(codeset, "CP50220") == 0 ||
1188                       strcmp(codeset, "CP50221") == 0 ||
1189                       strcmp(codeset, "CP50222") == 0){
1190                         input_f = JIS_INPUT;
1191 #ifdef SHIFTJIS_CP932
1192                         cp51932_f = TRUE;
1193 #endif
1194 #ifdef UTF8_OUTPUT_ENABLE
1195                         ms_ucs_map_f = UCS_MAP_CP932;
1196 #endif
1197                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1198                         input_f = JIS_INPUT;
1199 #ifdef X0212_ENABLE
1200                         x0212_f = TRUE;
1201 #endif
1202                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1203                         input_f = JIS_INPUT;
1204 #ifdef X0212_ENABLE
1205                         x0212_f = TRUE;
1206 #endif
1207                         x0213_f = TRUE;
1208                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1209                         input_f = SJIS_INPUT;
1210                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1211                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1212                              strcmp(codeset, "CP932") == 0 ||
1213                              strcmp(codeset, "MS932") == 0){
1214                         input_f = SJIS_INPUT;
1215 #ifdef SHIFTJIS_CP932
1216                         cp51932_f = TRUE;
1217 #endif
1218 #ifdef UTF8_OUTPUT_ENABLE
1219                         ms_ucs_map_f = UCS_MAP_CP932;
1220 #endif
1221                     }else if(strcmp(codeset, "CP10001") == 0){
1222                         input_f = SJIS_INPUT;
1223 #ifdef SHIFTJIS_CP932
1224                         cp51932_f = TRUE;
1225 #endif
1226 #ifdef UTF8_OUTPUT_ENABLE
1227                         ms_ucs_map_f = UCS_MAP_CP10001;
1228 #endif
1229                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1230                              strcmp(codeset, "EUC-JP") == 0){
1231                         input_f = EUC_INPUT;
1232                     }else if(strcmp(codeset, "CP51932") == 0){
1233                         input_f = EUC_INPUT;
1234 #ifdef SHIFTJIS_CP932
1235                         cp51932_f = TRUE;
1236 #endif
1237 #ifdef UTF8_OUTPUT_ENABLE
1238                         ms_ucs_map_f = UCS_MAP_CP932;
1239 #endif
1240                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1241                              strcmp(codeset, "EUCJP-MS") == 0 ||
1242                              strcmp(codeset, "EUCJPMS") == 0){
1243                         input_f = EUC_INPUT;
1244 #ifdef SHIFTJIS_CP932
1245                         cp51932_f = FALSE;
1246 #endif
1247 #ifdef UTF8_OUTPUT_ENABLE
1248                         ms_ucs_map_f = UCS_MAP_MS;
1249 #endif
1250                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1251                              strcmp(codeset, "EUCJP-ASCII") == 0){
1252                         input_f = EUC_INPUT;
1253 #ifdef SHIFTJIS_CP932
1254                         cp51932_f = FALSE;
1255 #endif
1256 #ifdef UTF8_OUTPUT_ENABLE
1257                         ms_ucs_map_f = UCS_MAP_ASCII;
1258 #endif
1259                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1260                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1261                         input_f = SJIS_INPUT;
1262                         x0213_f = TRUE;
1263 #ifdef SHIFTJIS_CP932
1264                         cp51932_f = FALSE;
1265 #endif
1266                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1267                              strcmp(codeset, "EUC-JIS-2004") == 0){
1268                         input_f = EUC_INPUT;
1269                         x0213_f = TRUE;
1270 #ifdef SHIFTJIS_CP932
1271                         cp51932_f = FALSE;
1272 #endif
1273 #ifdef UTF8_INPUT_ENABLE
1274                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1275                              strcmp(codeset, "UTF-8N") == 0 ||
1276                              strcmp(codeset, "UTF-8-BOM") == 0){
1277                         input_f = UTF8_INPUT;
1278 #ifdef UNICODE_NORMALIZATION
1279                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1280                              strcmp(codeset, "UTF-8-MAC") == 0){
1281                         input_f = UTF8_INPUT;
1282                         nfc_f = TRUE;
1283 #endif
1284                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1285                              strcmp(codeset, "UTF-16BE") == 0 ||
1286                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1287                         input_f = UTF16_INPUT;
1288                         input_endian = ENDIAN_BIG;
1289                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1290                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1291                         input_f = UTF16_INPUT;
1292                         input_endian = ENDIAN_LITTLE;
1293                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1294                              strcmp(codeset, "UTF-32BE") == 0 ||
1295                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1296                         input_f = UTF32_INPUT;
1297                         input_endian = ENDIAN_BIG;
1298                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1299                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1300                         input_f = UTF32_INPUT;
1301                         input_endian = ENDIAN_LITTLE;
1302 #endif
1303                     } else {
1304                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1305                     }
1306                     continue;
1307                 }
1308                 if (strcmp(long_option[i].name, "oc=") == 0){
1309                     x0201_f = FALSE;
1310                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1311                         codeset[i] = nkf_toupper(p[i]);
1312                     }
1313                     codeset[i] = 0;
1314                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1315                         output_conv = j_oconv;
1316                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1317                         output_conv = j_oconv;
1318                         no_cp932ext_f = TRUE;
1319 #ifdef SHIFTJIS_CP932
1320                         cp932inv_f = FALSE;
1321 #endif
1322 #ifdef UTF8_OUTPUT_ENABLE
1323                         ms_ucs_map_f = UCS_MAP_CP932;
1324 #endif
1325                     }else if(strcmp(codeset, "CP50220") == 0){
1326                         output_conv = j_oconv;
1327                         x0201_f = TRUE;
1328 #ifdef SHIFTJIS_CP932
1329                         cp932inv_f = FALSE;
1330 #endif
1331 #ifdef UTF8_OUTPUT_ENABLE
1332                         ms_ucs_map_f = UCS_MAP_CP932;
1333 #endif
1334                     }else if(strcmp(codeset, "CP50221") == 0){
1335                         output_conv = j_oconv;
1336 #ifdef SHIFTJIS_CP932
1337                         cp932inv_f = FALSE;
1338 #endif
1339 #ifdef UTF8_OUTPUT_ENABLE
1340                         ms_ucs_map_f = UCS_MAP_CP932;
1341 #endif
1342                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1343                         output_conv = j_oconv;
1344 #ifdef X0212_ENABLE
1345                         x0212_f = TRUE;
1346 #endif
1347 #ifdef SHIFTJIS_CP932
1348                         cp932inv_f = FALSE;
1349 #endif
1350                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1351                         output_conv = j_oconv;
1352 #ifdef X0212_ENABLE
1353                         x0212_f = TRUE;
1354 #endif
1355                         x0213_f = TRUE;
1356 #ifdef SHIFTJIS_CP932
1357                         cp932inv_f = FALSE;
1358 #endif
1359                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1360                         output_conv = s_oconv;
1361                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1362                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1363                              strcmp(codeset, "CP932") == 0 ||
1364                              strcmp(codeset, "MS932") == 0){
1365                         output_conv = s_oconv;
1366 #ifdef UTF8_OUTPUT_ENABLE
1367                         ms_ucs_map_f = UCS_MAP_CP932;
1368 #endif
1369                     }else if(strcmp(codeset, "CP10001") == 0){
1370                         output_conv = s_oconv;
1371 #ifdef UTF8_OUTPUT_ENABLE
1372                         ms_ucs_map_f = UCS_MAP_CP10001;
1373 #endif
1374                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1375                              strcmp(codeset, "EUC-JP") == 0){
1376                         output_conv = e_oconv;
1377                     }else if(strcmp(codeset, "CP51932") == 0){
1378                         output_conv = e_oconv;
1379 #ifdef SHIFTJIS_CP932
1380                         cp932inv_f = FALSE;
1381 #endif
1382 #ifdef UTF8_OUTPUT_ENABLE
1383                         ms_ucs_map_f = UCS_MAP_CP932;
1384 #endif
1385                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1386                              strcmp(codeset, "EUCJP-MS") == 0 ||
1387                              strcmp(codeset, "EUCJPMS") == 0){
1388                         output_conv = e_oconv;
1389 #ifdef X0212_ENABLE
1390                         x0212_f = TRUE;
1391 #endif
1392 #ifdef UTF8_OUTPUT_ENABLE
1393                         ms_ucs_map_f = UCS_MAP_MS;
1394 #endif
1395                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1396                              strcmp(codeset, "EUCJP-ASCII") == 0){
1397                         output_conv = e_oconv;
1398 #ifdef X0212_ENABLE
1399                         x0212_f = TRUE;
1400 #endif
1401 #ifdef UTF8_OUTPUT_ENABLE
1402                         ms_ucs_map_f = UCS_MAP_ASCII;
1403 #endif
1404                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1405                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1406                         output_conv = s_oconv;
1407                         x0213_f = TRUE;
1408 #ifdef SHIFTJIS_CP932
1409                         cp932inv_f = FALSE;
1410 #endif
1411                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1412                              strcmp(codeset, "EUC-JIS-2004") == 0){
1413                         output_conv = e_oconv;
1414 #ifdef X0212_ENABLE
1415                         x0212_f = TRUE;
1416 #endif
1417                         x0213_f = TRUE;
1418 #ifdef SHIFTJIS_CP932
1419                         cp932inv_f = FALSE;
1420 #endif
1421 #ifdef UTF8_OUTPUT_ENABLE
1422                     }else if(strcmp(codeset, "UTF-8") == 0){
1423                         output_conv = w_oconv;
1424                     }else if(strcmp(codeset, "UTF-8N") == 0){
1425                         output_conv = w_oconv;
1426                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1427                         output_conv = w_oconv;
1428                         output_bom_f = TRUE;
1429                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1430                         output_conv = w_oconv16;
1431                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1432                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1433                         output_conv = w_oconv16;
1434                         output_bom_f = TRUE;
1435                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1436                         output_conv = w_oconv16;
1437                         output_endian = ENDIAN_LITTLE;
1438                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1439                         output_conv = w_oconv16;
1440                         output_endian = ENDIAN_LITTLE;
1441                         output_bom_f = TRUE;
1442                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1443                              strcmp(codeset, "UTF-32BE") == 0){
1444                         output_conv = w_oconv32;
1445                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1446                         output_conv = w_oconv32;
1447                         output_bom_f = TRUE;
1448                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1449                         output_conv = w_oconv32;
1450                         output_endian = ENDIAN_LITTLE;
1451                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1452                         output_conv = w_oconv32;
1453                         output_endian = ENDIAN_LITTLE;
1454                         output_bom_f = TRUE;
1455 #endif
1456                     } else {
1457                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1458                     }
1459                     continue;
1460                 }
1461                 if (strcmp(long_option[i].name, "guess=") == 0){
1462                     if (p[0] == '1') {
1463                         guess_f = 2;
1464                     } else {
1465                         guess_f = 1;
1466                     }
1467                     continue;
1468                 }
1469 #ifdef OVERWRITE
1470                 if (strcmp(long_option[i].name, "overwrite") == 0){
1471                     file_out_f = TRUE;
1472                     overwrite_f = TRUE;
1473                     preserve_time_f = TRUE;
1474                     continue;
1475                 }
1476                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1477                     file_out_f = TRUE;
1478                     overwrite_f = TRUE;
1479                     preserve_time_f = TRUE;
1480                     backup_f = TRUE;
1481                     backup_suffix = malloc(strlen((char *) p) + 1);
1482                     strcpy(backup_suffix, (char *) p);
1483                     continue;
1484                 }
1485                 if (strcmp(long_option[i].name, "in-place") == 0){
1486                     file_out_f = TRUE;
1487                     overwrite_f = TRUE;
1488                     preserve_time_f = FALSE;
1489                     continue;
1490                 }
1491                 if (strcmp(long_option[i].name, "in-place=") == 0){
1492                     file_out_f = TRUE;
1493                     overwrite_f = TRUE;
1494                     preserve_time_f = FALSE;
1495                     backup_f = TRUE;
1496                     backup_suffix = malloc(strlen((char *) p) + 1);
1497                     strcpy(backup_suffix, (char *) p);
1498                     continue;
1499                 }
1500 #endif
1501 #ifdef INPUT_OPTION
1502                 if (strcmp(long_option[i].name, "cap-input") == 0){
1503                     cap_f = TRUE;
1504                     continue;
1505                 }
1506                 if (strcmp(long_option[i].name, "url-input") == 0){
1507                     url_f = TRUE;
1508                     continue;
1509                 }
1510 #endif
1511 #ifdef NUMCHAR_OPTION
1512                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1513                     numchar_f = TRUE;
1514                     continue;
1515                 }
1516 #endif
1517 #ifdef CHECK_OPTION
1518                 if (strcmp(long_option[i].name, "no-output") == 0){
1519                     noout_f = TRUE;
1520                     continue;
1521                 }
1522                 if (strcmp(long_option[i].name, "debug") == 0){
1523                     debug_f = TRUE;
1524                     continue;
1525                 }
1526 #endif
1527                 if (strcmp(long_option[i].name, "cp932") == 0){
1528 #ifdef SHIFTJIS_CP932
1529                     cp51932_f = TRUE;
1530                     cp932inv_f = TRUE;
1531 #endif
1532 #ifdef UTF8_OUTPUT_ENABLE
1533                     ms_ucs_map_f = UCS_MAP_CP932;
1534 #endif
1535                     continue;
1536                 }
1537                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1538 #ifdef SHIFTJIS_CP932
1539                     cp51932_f = FALSE;
1540                     cp932inv_f = FALSE;
1541 #endif
1542 #ifdef UTF8_OUTPUT_ENABLE
1543                     ms_ucs_map_f = UCS_MAP_ASCII;
1544 #endif
1545                     continue;
1546                 }
1547 #ifdef SHIFTJIS_CP932
1548                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1549                     cp932inv_f = TRUE;
1550                     continue;
1551                 }
1552 #endif
1553
1554 #ifdef X0212_ENABLE
1555                 if (strcmp(long_option[i].name, "x0212") == 0){
1556                     x0212_f = TRUE;
1557                     continue;
1558                 }
1559 #endif
1560
1561 #ifdef EXEC_IO
1562                   if (strcmp(long_option[i].name, "exec-in") == 0){
1563                       exec_f = 1;
1564                       return;
1565                   }
1566                   if (strcmp(long_option[i].name, "exec-out") == 0){
1567                       exec_f = -1;
1568                       return;
1569                   }
1570 #endif
1571 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1572                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1573                     no_cp932ext_f = TRUE;
1574                     continue;
1575                 }
1576                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1577                     no_best_fit_chars_f = TRUE;
1578                     continue;
1579                 }
1580                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1581                     encode_fallback = NULL;
1582                     continue;
1583                 }
1584                 if (strcmp(long_option[i].name, "fb-html") == 0){
1585                     encode_fallback = encode_fallback_html;
1586                     continue;
1587                 }
1588                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1589                     encode_fallback = encode_fallback_xml;
1590                     continue;
1591                 }
1592                 if (strcmp(long_option[i].name, "fb-java") == 0){
1593                     encode_fallback = encode_fallback_java;
1594                     continue;
1595                 }
1596                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1597                     encode_fallback = encode_fallback_perl;
1598                     continue;
1599                 }
1600                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1601                     encode_fallback = encode_fallback_subchar;
1602                     continue;
1603                 }
1604                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1605                     encode_fallback = encode_fallback_subchar;
1606                     unicode_subchar = 0;
1607                     if (p[0] != '0'){
1608                         /* decimal number */
1609                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1610                             unicode_subchar *= 10;
1611                             unicode_subchar += hex2bin(p[i]);
1612                         }
1613                     }else if(p[1] == 'x' || p[1] == 'X'){
1614                         /* hexadecimal number */
1615                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1616                             unicode_subchar <<= 4;
1617                             unicode_subchar |= hex2bin(p[i]);
1618                         }
1619                     }else{
1620                         /* octal number */
1621                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1622                             unicode_subchar *= 8;
1623                             unicode_subchar += hex2bin(p[i]);
1624                         }
1625                     }
1626                     w16e_conv(unicode_subchar, &i, &j);
1627                     unicode_subchar = i<<8 | j;
1628                     continue;
1629                 }
1630 #endif
1631 #ifdef UTF8_OUTPUT_ENABLE
1632                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1633                     ms_ucs_map_f = UCS_MAP_MS;
1634                     continue;
1635                 }
1636 #endif
1637 #ifdef UNICODE_NORMALIZATION
1638                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1639                     input_f = UTF8_INPUT;
1640                     nfc_f = TRUE;
1641                     continue;
1642                 }
1643 #endif
1644                 if (strcmp(long_option[i].name, "prefix=") == 0){
1645                     if (nkf_isgraph(p[0])){
1646                         for (i = 1; nkf_isgraph(p[i]); i++){
1647                             prefix_table[p[i]] = p[0];
1648                         }
1649                     }
1650                     continue;
1651                 }
1652             }
1653             continue;
1654         case 'b':           /* buffered mode */
1655             unbuf_f = FALSE;
1656             continue;
1657         case 'u':           /* non bufferd mode */
1658             unbuf_f = TRUE;
1659             continue;
1660         case 't':           /* transparent mode */
1661             if (*cp=='1') {
1662                 /* alias of -t */
1663                 nop_f = TRUE;
1664                 *cp++;
1665             } else if (*cp=='2') {
1666                 /*
1667                  * -t with put/get
1668                  *
1669                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1670                  *
1671                  */
1672                 nop_f = 2;
1673                 *cp++;
1674             } else
1675                 nop_f = TRUE;
1676             continue;
1677         case 'j':           /* JIS output */
1678         case 'n':
1679             output_conv = j_oconv;
1680             continue;
1681         case 'e':           /* AT&T EUC output */
1682             output_conv = e_oconv;
1683             cp932inv_f = FALSE;
1684             continue;
1685         case 's':           /* SJIS output */
1686             output_conv = s_oconv;
1687             continue;
1688         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1689             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1690             input_f = LATIN1_INPUT;
1691             continue;
1692         case 'i':           /* Kanji IN ESC-$-@/B */
1693             if (*cp=='@'||*cp=='B')
1694                 kanji_intro = *cp++;
1695             continue;
1696         case 'o':           /* ASCII IN ESC-(-J/B */
1697             if (*cp=='J'||*cp=='B'||*cp=='H')
1698                 ascii_intro = *cp++;
1699             continue;
1700         case 'h':
1701             /*
1702                 bit:1   katakana->hiragana
1703                 bit:2   hiragana->katakana
1704             */
1705             if ('9'>= *cp && *cp>='0')
1706                 hira_f |= (*cp++ -'0');
1707             else
1708                 hira_f |= 1;
1709             continue;
1710         case 'r':
1711             rot_f = TRUE;
1712             continue;
1713 #if defined(MSDOS) || defined(__OS2__)
1714         case 'T':
1715             binmode_f = FALSE;
1716             continue;
1717 #endif
1718 #ifndef PERL_XS
1719         case 'V':
1720             show_configuration();
1721             exit(1);
1722             break;
1723         case 'v':
1724             usage();
1725             exit(1);
1726             break;
1727 #endif
1728 #ifdef UTF8_OUTPUT_ENABLE
1729         case 'w':           /* UTF-8 output */
1730             if (cp[0] == '8') {
1731                 output_conv = w_oconv; cp++;
1732                 if (cp[0] == '0'){
1733                     cp++;
1734                 } else {
1735                     output_bom_f = TRUE;
1736                 }
1737             } else {
1738                 if ('1'== cp[0] && '6'==cp[1]) {
1739                     output_conv = w_oconv16; cp+=2;
1740                 } else if ('3'== cp[0] && '2'==cp[1]) {
1741                     output_conv = w_oconv32; cp+=2;
1742                 } else {
1743                     output_conv = w_oconv;
1744                     continue;
1745                 }
1746                 if (cp[0]=='L') {
1747                     cp++;
1748                     output_endian = ENDIAN_LITTLE;
1749                 } else if (cp[0] == 'B') {
1750                     cp++;
1751                 } else {
1752                     continue;
1753                 }
1754                 if (cp[0] == '0'){
1755                     cp++;
1756                 } else {
1757                     output_bom_f = TRUE;
1758                 }
1759             }
1760             continue;
1761 #endif
1762 #ifdef UTF8_INPUT_ENABLE
1763         case 'W':           /* UTF input */
1764             if (cp[0] == '8') {
1765                 cp++;
1766                 input_f = UTF8_INPUT;
1767             }else{
1768                 if ('1'== cp[0] && '6'==cp[1]) {
1769                     cp += 2;
1770                     input_f = UTF16_INPUT;
1771                     input_endian = ENDIAN_BIG;
1772                 } else if ('3'== cp[0] && '2'==cp[1]) {
1773                     cp += 2;
1774                     input_f = UTF32_INPUT;
1775                     input_endian = ENDIAN_BIG;
1776                 } else {
1777                     input_f = UTF8_INPUT;
1778                     continue;
1779                 }
1780                 if (cp[0]=='L') {
1781                     cp++;
1782                     input_endian = ENDIAN_LITTLE;
1783                 } else if (cp[0] == 'B') {
1784                     cp++;
1785                 }
1786             }
1787             continue;
1788 #endif
1789         /* Input code assumption */
1790         case 'J':   /* JIS input */
1791             input_f = JIS_INPUT;
1792             continue;
1793         case 'E':   /* AT&T EUC input */
1794             input_f = EUC_INPUT;
1795             continue;
1796         case 'S':   /* MS Kanji input */
1797             input_f = SJIS_INPUT;
1798             continue;
1799         case 'Z':   /* Convert X0208 alphabet to asii */
1800             /* alpha_f
1801                bit:0   Convert JIS X 0208 Alphabet to ASCII
1802                bit:1   Convert Kankaku to one space
1803                bit:2   Convert Kankaku to two spaces
1804                bit:3   Convert HTML Entity
1805                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1806             */
1807             while ('0'<= *cp && *cp <='9') {
1808                 alpha_f |= 1 << (*cp++ - '0');
1809             }
1810             if (!alpha_f) alpha_f = 1;
1811             continue;
1812         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1813             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1814             /* accept  X0201
1815                     ESC-(-I     in JIS, EUC, MS Kanji
1816                     SI/SO       in JIS, EUC, MS Kanji
1817                     SSO         in EUC, JIS, not in MS Kanji
1818                     MS Kanji (0xa0-0xdf)
1819                output  X0201
1820                     ESC-(-I     in JIS (0x20-0x5f)
1821                     SSO         in EUC (0xa0-0xdf)
1822                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1823             */
1824             continue;
1825         case 'X':   /* Convert X0201 kana to X0208 */
1826             x0201_f = TRUE;
1827             continue;
1828         case 'F':   /* prserve new lines */
1829             fold_preserve_f = TRUE;
1830         case 'f':   /* folding -f60 or -f */
1831             fold_f = TRUE;
1832             fold_len = 0;
1833             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1834                 fold_len *= 10;
1835                 fold_len += *cp++ - '0';
1836             }
1837             if (!(0<fold_len && fold_len<BUFSIZ))
1838                 fold_len = DEFAULT_FOLD;
1839             if (*cp=='-') {
1840                 fold_margin = 0;
1841                 cp++;
1842                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1843                     fold_margin *= 10;
1844                     fold_margin += *cp++ - '0';
1845                 }
1846             }
1847             continue;
1848         case 'm':   /* MIME support */
1849             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1850             if (*cp=='B'||*cp=='Q') {
1851                 mime_decode_mode = *cp++;
1852                 mimebuf_f = FIXED_MIME;
1853             } else if (*cp=='N') {
1854                 mime_f = TRUE; cp++;
1855             } else if (*cp=='S') {
1856                 mime_f = STRICT_MIME; cp++;
1857             } else if (*cp=='0') {
1858                 mime_decode_f = FALSE;
1859                 mime_f = FALSE; cp++;
1860             }
1861             continue;
1862         case 'M':   /* MIME output */
1863             if (*cp=='B') {
1864                 mimeout_mode = 'B';
1865                 mimeout_f = FIXED_MIME; cp++;
1866             } else if (*cp=='Q') {
1867                 mimeout_mode = 'Q';
1868                 mimeout_f = FIXED_MIME; cp++;
1869             } else {
1870                 mimeout_f = TRUE;
1871             }
1872             continue;
1873         case 'B':   /* Broken JIS support */
1874             /*  bit:0   no ESC JIS
1875                 bit:1   allow any x on ESC-(-x or ESC-$-x
1876                 bit:2   reset to ascii on NL
1877             */
1878             if ('9'>= *cp && *cp>='0')
1879                 broken_f |= 1<<(*cp++ -'0');
1880             else
1881                 broken_f |= TRUE;
1882             continue;
1883 #ifndef PERL_XS
1884         case 'O':/* for Output file */
1885             file_out_f = TRUE;
1886             continue;
1887 #endif
1888         case 'c':/* add cr code */
1889             nlmode_f = CRLF;
1890             continue;
1891         case 'd':/* delete cr code */
1892             nlmode_f = LF;
1893             continue;
1894         case 'I':   /* ISO-2022-JP output */
1895             iso2022jp_f = TRUE;
1896             continue;
1897         case 'L':  /* line mode */
1898             if (*cp=='u') {         /* unix */
1899                 nlmode_f = LF; cp++;
1900             } else if (*cp=='m') { /* mac */
1901                 nlmode_f = CR; cp++;
1902             } else if (*cp=='w') { /* windows */
1903                 nlmode_f = CRLF; cp++;
1904             } else if (*cp=='0') { /* no conversion  */
1905                 nlmode_f = 0; cp++;
1906             }
1907             continue;
1908 #ifndef PERL_XS
1909         case 'g':
1910             if (*cp == '1') {
1911                 guess_f = 2;
1912                 cp++;
1913             } else if (*cp == '0') {
1914                 guess_f = 1;
1915                 cp++;
1916             } else {
1917                 guess_f = 1;
1918             }
1919             continue;
1920 #endif
1921         case SP:
1922         /* module muliple options in a string are allowed for Perl moudle  */
1923             while(*cp && *cp++!='-');
1924             continue;
1925         default:
1926             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
1927             /* bogus option but ignored */
1928             continue;
1929         }
1930     }
1931 }
1932
1933 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1934 {
1935     if (iconv_func){
1936         struct input_code *p = input_code_list;
1937         while (p->name){
1938             if (iconv_func == p->iconv_func){
1939                 return p;
1940             }
1941             p++;
1942         }
1943     }
1944     return 0;
1945 }
1946
1947 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1948 {
1949 #ifdef INPUT_CODE_FIX
1950     if (f || !input_f)
1951 #endif
1952         if (estab_f != f){
1953             estab_f = f;
1954         }
1955
1956     if (iconv_func
1957 #ifdef INPUT_CODE_FIX
1958         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1959 #endif
1960         ){
1961         iconv = iconv_func;
1962     }
1963 #ifdef CHECK_OPTION
1964     if (estab_f && iconv_for_check != iconv){
1965         struct input_code *p = find_inputcode_byfunc(iconv);
1966         if (p){
1967             set_input_codename(p->name);
1968             debug(p->name);
1969         }
1970         iconv_for_check = iconv;
1971     }
1972 #endif
1973 }
1974
1975 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1976 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1977 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1978 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
1979 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
1980 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1981 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1982 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1983
1984 #define SCORE_INIT (SCORE_iMIME)
1985
1986 static const char score_table_A0[] = {
1987     0, 0, 0, 0,
1988     0, 0, 0, 0,
1989     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1990     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1991 };
1992
1993 static const char score_table_F0[] = {
1994     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1995     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1996     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
1997     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1998 };
1999
2000 void set_code_score(struct input_code *ptr, nkf_char score)
2001 {
2002     if (ptr){
2003         ptr->score |= score;
2004     }
2005 }
2006
2007 void clr_code_score(struct input_code *ptr, nkf_char score)
2008 {
2009     if (ptr){
2010         ptr->score &= ~score;
2011     }
2012 }
2013
2014 void code_score(struct input_code *ptr)
2015 {
2016     nkf_char c2 = ptr->buf[0];
2017 #ifdef UTF8_OUTPUT_ENABLE
2018     nkf_char c1 = ptr->buf[1];
2019 #endif
2020     if (c2 < 0){
2021         set_code_score(ptr, SCORE_ERROR);
2022     }else if (c2 == SSO){
2023         set_code_score(ptr, SCORE_KANA);
2024     }else if (c2 == 0x8f){
2025         set_code_score(ptr, SCORE_X0212);
2026 #ifdef UTF8_OUTPUT_ENABLE
2027     }else if (!e2w_conv(c2, c1)){
2028         set_code_score(ptr, SCORE_NO_EXIST);
2029 #endif
2030     }else if ((c2 & 0x70) == 0x20){
2031         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2032     }else if ((c2 & 0x70) == 0x70){
2033         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2034     }else if ((c2 & 0x70) >= 0x50){
2035         set_code_score(ptr, SCORE_L2);
2036     }
2037 }
2038
2039 void status_disable(struct input_code *ptr)
2040 {
2041     ptr->stat = -1;
2042     ptr->buf[0] = -1;
2043     code_score(ptr);
2044     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2045 }
2046
2047 void status_push_ch(struct input_code *ptr, nkf_char c)
2048 {
2049     ptr->buf[ptr->index++] = c;
2050 }
2051
2052 void status_clear(struct input_code *ptr)
2053 {
2054     ptr->stat = 0;
2055     ptr->index = 0;
2056 }
2057
2058 void status_reset(struct input_code *ptr)
2059 {
2060     status_clear(ptr);
2061     ptr->score = SCORE_INIT;
2062 }
2063
2064 void status_reinit(struct input_code *ptr)
2065 {
2066     status_reset(ptr);
2067     ptr->_file_stat = 0;
2068 }
2069
2070 void status_check(struct input_code *ptr, nkf_char c)
2071 {
2072     if (c <= DEL && estab_f){
2073         status_reset(ptr);
2074     }
2075 }
2076
2077 void s_status(struct input_code *ptr, nkf_char c)
2078 {
2079     switch(ptr->stat){
2080       case -1:
2081           status_check(ptr, c);
2082           break;
2083       case 0:
2084           if (c <= DEL){
2085               break;
2086 #ifdef NUMCHAR_OPTION
2087           }else if (is_unicode_capsule(c)){
2088               break;
2089 #endif
2090           }else if (0xa1 <= c && c <= 0xdf){
2091               status_push_ch(ptr, SSO);
2092               status_push_ch(ptr, c);
2093               code_score(ptr);
2094               status_clear(ptr);
2095           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2096               ptr->stat = 1;
2097               status_push_ch(ptr, c);
2098           }else if (0xed <= c && c <= 0xee){
2099               ptr->stat = 3;
2100               status_push_ch(ptr, c);
2101 #ifdef SHIFTJIS_CP932
2102           }else if (is_ibmext_in_sjis(c)){
2103               ptr->stat = 2;
2104               status_push_ch(ptr, c);
2105 #endif /* SHIFTJIS_CP932 */
2106 #ifdef X0212_ENABLE
2107           }else if (0xf0 <= c && c <= 0xfc){
2108               ptr->stat = 1;
2109               status_push_ch(ptr, c);
2110 #endif /* X0212_ENABLE */
2111           }else{
2112               status_disable(ptr);
2113           }
2114           break;
2115       case 1:
2116           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2117               status_push_ch(ptr, c);
2118               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2119               code_score(ptr);
2120               status_clear(ptr);
2121           }else{
2122               status_disable(ptr);
2123           }
2124           break;
2125       case 2:
2126 #ifdef SHIFTJIS_CP932
2127         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2128             status_push_ch(ptr, c);
2129             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2130                 set_code_score(ptr, SCORE_CP932);
2131                 status_clear(ptr);
2132                 break;
2133             }
2134         }
2135 #endif /* SHIFTJIS_CP932 */
2136         status_disable(ptr);
2137           break;
2138       case 3:
2139           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2140               status_push_ch(ptr, c);
2141               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2142             set_code_score(ptr, SCORE_CP932);
2143             status_clear(ptr);
2144           }else{
2145               status_disable(ptr);
2146           }
2147           break;
2148     }
2149 }
2150
2151 void e_status(struct input_code *ptr, nkf_char c)
2152 {
2153     switch (ptr->stat){
2154       case -1:
2155           status_check(ptr, c);
2156           break;
2157       case 0:
2158           if (c <= DEL){
2159               break;
2160 #ifdef NUMCHAR_OPTION
2161           }else if (is_unicode_capsule(c)){
2162               break;
2163 #endif
2164           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2165               ptr->stat = 1;
2166               status_push_ch(ptr, c);
2167 #ifdef X0212_ENABLE
2168           }else if (0x8f == c){
2169               ptr->stat = 2;
2170               status_push_ch(ptr, c);
2171 #endif /* X0212_ENABLE */
2172           }else{
2173               status_disable(ptr);
2174           }
2175           break;
2176       case 1:
2177           if (0xa1 <= c && c <= 0xfe){
2178               status_push_ch(ptr, c);
2179               code_score(ptr);
2180               status_clear(ptr);
2181           }else{
2182               status_disable(ptr);
2183           }
2184           break;
2185 #ifdef X0212_ENABLE
2186       case 2:
2187           if (0xa1 <= c && c <= 0xfe){
2188               ptr->stat = 1;
2189               status_push_ch(ptr, c);
2190           }else{
2191               status_disable(ptr);
2192           }
2193 #endif /* X0212_ENABLE */
2194     }
2195 }
2196
2197 #ifdef UTF8_INPUT_ENABLE
2198 void w_status(struct input_code *ptr, nkf_char c)
2199 {
2200     switch (ptr->stat){
2201       case -1:
2202           status_check(ptr, c);
2203           break;
2204       case 0:
2205           if (c <= DEL){
2206               break;
2207 #ifdef NUMCHAR_OPTION
2208           }else if (is_unicode_capsule(c)){
2209               break;
2210 #endif
2211           }else if (0xc0 <= c && c <= 0xdf){
2212               ptr->stat = 1;
2213               status_push_ch(ptr, c);
2214           }else if (0xe0 <= c && c <= 0xef){
2215               ptr->stat = 2;
2216               status_push_ch(ptr, c);
2217           }else if (0xf0 <= c && c <= 0xf4){
2218               ptr->stat = 3;
2219               status_push_ch(ptr, c);
2220           }else{
2221               status_disable(ptr);
2222           }
2223           break;
2224       case 1:
2225       case 2:
2226           if (0x80 <= c && c <= 0xbf){
2227               status_push_ch(ptr, c);
2228               if (ptr->index > ptr->stat){
2229                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2230                              && ptr->buf[2] == 0xbf);
2231                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2232                            &ptr->buf[0], &ptr->buf[1]);
2233                   if (!bom){
2234                       code_score(ptr);
2235                   }
2236                   status_clear(ptr);
2237               }
2238           }else{
2239               status_disable(ptr);
2240           }
2241           break;
2242       case 3:
2243         if (0x80 <= c && c <= 0xbf){
2244             if (ptr->index < ptr->stat){
2245                 status_push_ch(ptr, c);
2246             } else {
2247                 status_clear(ptr);
2248             }
2249           }else{
2250               status_disable(ptr);
2251           }
2252           break;
2253     }
2254 }
2255 #endif
2256
2257 void code_status(nkf_char c)
2258 {
2259     int action_flag = 1;
2260     struct input_code *result = 0;
2261     struct input_code *p = input_code_list;
2262     while (p->name){
2263         if (!p->status_func) {
2264             ++p;
2265             continue;
2266         }
2267         if (!p->status_func)
2268             continue;
2269         (p->status_func)(p, c);
2270         if (p->stat > 0){
2271             action_flag = 0;
2272         }else if(p->stat == 0){
2273             if (result){
2274                 action_flag = 0;
2275             }else{
2276                 result = p;
2277             }
2278         }
2279         ++p;
2280     }
2281
2282     if (action_flag){
2283         if (result && !estab_f){
2284             set_iconv(TRUE, result->iconv_func);
2285         }else if (c <= DEL){
2286             struct input_code *ptr = input_code_list;
2287             while (ptr->name){
2288                 status_reset(ptr);
2289                 ++ptr;
2290             }
2291         }
2292     }
2293 }
2294
2295 #ifndef WIN32DLL
2296 nkf_char std_getc(FILE *f)
2297 {
2298     if (std_gc_ndx){
2299         return std_gc_buf[--std_gc_ndx];
2300     }
2301     return getc(f);
2302 }
2303 #endif /*WIN32DLL*/
2304
2305 nkf_char std_ungetc(nkf_char c, FILE *f)
2306 {
2307     if (std_gc_ndx == STD_GC_BUFSIZE){
2308         return EOF;
2309     }
2310     std_gc_buf[std_gc_ndx++] = c;
2311     return c;
2312 }
2313
2314 #ifndef WIN32DLL
2315 void std_putc(nkf_char c)
2316 {
2317     if(c!=EOF)
2318       putchar(c);
2319 }
2320 #endif /*WIN32DLL*/
2321
2322 #if !defined(PERL_XS) && !defined(WIN32DLL)
2323 nkf_char noconvert(FILE *f)
2324 {
2325     nkf_char    c;
2326
2327     if (nop_f == 2)
2328         module_connection();
2329     while ((c = (*i_getc)(f)) != EOF)
2330       (*o_putc)(c);
2331     (*o_putc)(EOF);
2332     return 1;
2333 }
2334 #endif
2335
2336 void module_connection(void)
2337 {
2338     oconv = output_conv;
2339     o_putc = std_putc;
2340
2341     /* replace continucation module, from output side */
2342
2343     /* output redicrection */
2344 #ifdef CHECK_OPTION
2345     if (noout_f || guess_f){
2346         o_putc = no_putc;
2347     }
2348 #endif
2349     if (mimeout_f) {
2350         o_mputc = o_putc;
2351         o_putc = mime_putc;
2352         if (mimeout_f == TRUE) {
2353             o_base64conv = oconv; oconv = base64_conv;
2354         }
2355         /* base64_count = 0; */
2356     }
2357
2358     if (nlmode_f || guess_f) {
2359         o_nlconv = oconv; oconv = nl_conv;
2360     }
2361     if (rot_f) {
2362         o_rot_conv = oconv; oconv = rot_conv;
2363     }
2364     if (iso2022jp_f) {
2365         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2366     }
2367     if (hira_f) {
2368         o_hira_conv = oconv; oconv = hira_conv;
2369     }
2370     if (fold_f) {
2371         o_fconv = oconv; oconv = fold_conv;
2372         f_line = 0;
2373     }
2374     if (alpha_f || x0201_f) {
2375         o_zconv = oconv; oconv = z_conv;
2376     }
2377
2378     i_getc = std_getc;
2379     i_ungetc = std_ungetc;
2380     /* input redicrection */
2381 #ifdef INPUT_OPTION
2382     if (cap_f){
2383         i_cgetc = i_getc; i_getc = cap_getc;
2384         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2385     }
2386     if (url_f){
2387         i_ugetc = i_getc; i_getc = url_getc;
2388         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2389     }
2390 #endif
2391 #ifdef NUMCHAR_OPTION
2392     if (numchar_f){
2393         i_ngetc = i_getc; i_getc = numchar_getc;
2394         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2395     }
2396 #endif
2397 #ifdef UNICODE_NORMALIZATION
2398     if (nfc_f && input_f == UTF8_INPUT){
2399         i_nfc_getc = i_getc; i_getc = nfc_getc;
2400         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2401     }
2402 #endif
2403     if (mime_f && mimebuf_f==FIXED_MIME) {
2404         i_mgetc = i_getc; i_getc = mime_getc;
2405         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2406     }
2407     if (broken_f & 1) {
2408         i_bgetc = i_getc; i_getc = broken_getc;
2409         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2410     }
2411     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2412         set_iconv(-TRUE, e_iconv);
2413     } else if (input_f == SJIS_INPUT) {
2414         set_iconv(-TRUE, s_iconv);
2415 #ifdef UTF8_INPUT_ENABLE
2416     } else if (input_f == UTF8_INPUT) {
2417         set_iconv(-TRUE, w_iconv);
2418     } else if (input_f == UTF16_INPUT) {
2419         set_iconv(-TRUE, w_iconv16);
2420     } else if (input_f == UTF32_INPUT) {
2421         set_iconv(-TRUE, w_iconv32);
2422 #endif
2423     } else {
2424         set_iconv(FALSE, e_iconv);
2425     }
2426
2427     {
2428         struct input_code *p = input_code_list;
2429         while (p->name){
2430             status_reinit(p++);
2431         }
2432     }
2433 }
2434
2435 /*
2436  * Check and Ignore BOM
2437  */
2438 void check_bom(FILE *f)
2439 {
2440     int c2;
2441     switch(c2 = (*i_getc)(f)){
2442     case 0x00:
2443         if((c2 = (*i_getc)(f)) == 0x00){
2444             if((c2 = (*i_getc)(f)) == 0xFE){
2445                 if((c2 = (*i_getc)(f)) == 0xFF){
2446                     if(!input_f){
2447                         set_iconv(TRUE, w_iconv32);
2448                     }
2449                     if (iconv == w_iconv32) {
2450                         input_endian = ENDIAN_BIG;
2451                         return;
2452                     }
2453                     (*i_ungetc)(0xFF,f);
2454                 }else (*i_ungetc)(c2,f);
2455                 (*i_ungetc)(0xFE,f);
2456             }else if(c2 == 0xFF){
2457                 if((c2 = (*i_getc)(f)) == 0xFE){
2458                     if(!input_f){
2459                         set_iconv(TRUE, w_iconv32);
2460                     }
2461                     if (iconv == w_iconv32) {
2462                         input_endian = ENDIAN_2143;
2463                         return;
2464                     }
2465                     (*i_ungetc)(0xFF,f);
2466                 }else (*i_ungetc)(c2,f);
2467                 (*i_ungetc)(0xFF,f);
2468             }else (*i_ungetc)(c2,f);
2469             (*i_ungetc)(0x00,f);
2470         }else (*i_ungetc)(c2,f);
2471         (*i_ungetc)(0x00,f);
2472         break;
2473     case 0xEF:
2474         if((c2 = (*i_getc)(f)) == 0xBB){
2475             if((c2 = (*i_getc)(f)) == 0xBF){
2476                 if(!input_f){
2477                     set_iconv(TRUE, w_iconv);
2478                 }
2479                 if (iconv == w_iconv) {
2480                     return;
2481                 }
2482                 (*i_ungetc)(0xBF,f);
2483             }else (*i_ungetc)(c2,f);
2484             (*i_ungetc)(0xBB,f);
2485         }else (*i_ungetc)(c2,f);
2486         (*i_ungetc)(0xEF,f);
2487         break;
2488     case 0xFE:
2489         if((c2 = (*i_getc)(f)) == 0xFF){
2490             if((c2 = (*i_getc)(f)) == 0x00){
2491                 if((c2 = (*i_getc)(f)) == 0x00){
2492                     if(!input_f){
2493                         set_iconv(TRUE, w_iconv32);
2494                     }
2495                     if (iconv == w_iconv32) {
2496                         input_endian = ENDIAN_3412;
2497                         return;
2498                     }
2499                     (*i_ungetc)(0x00,f);
2500                 }else (*i_ungetc)(c2,f);
2501                 (*i_ungetc)(0x00,f);
2502             }else (*i_ungetc)(c2,f);
2503             if(!input_f){
2504                 set_iconv(TRUE, w_iconv16);
2505             }
2506             if (iconv == w_iconv16) {
2507                 input_endian = ENDIAN_BIG;
2508                 return;
2509             }
2510             (*i_ungetc)(0xFF,f);
2511         }else (*i_ungetc)(c2,f);
2512         (*i_ungetc)(0xFE,f);
2513         break;
2514     case 0xFF:
2515         if((c2 = (*i_getc)(f)) == 0xFE){
2516             if((c2 = (*i_getc)(f)) == 0x00){
2517                 if((c2 = (*i_getc)(f)) == 0x00){
2518                     if(!input_f){
2519                         set_iconv(TRUE, w_iconv32);
2520                     }
2521                     if (iconv == w_iconv32) {
2522                         input_endian = ENDIAN_LITTLE;
2523                         return;
2524                     }
2525                     (*i_ungetc)(0x00,f);
2526                 }else (*i_ungetc)(c2,f);
2527                 (*i_ungetc)(0x00,f);
2528             }else (*i_ungetc)(c2,f);
2529             if(!input_f){
2530                 set_iconv(TRUE, w_iconv16);
2531             }
2532             if (iconv == w_iconv16) {
2533                 input_endian = ENDIAN_LITTLE;
2534                 return;
2535             }
2536             (*i_ungetc)(0xFE,f);
2537         }else (*i_ungetc)(c2,f);
2538         (*i_ungetc)(0xFF,f);
2539         break;
2540     default:
2541         (*i_ungetc)(c2,f);
2542         break;
2543     }
2544 }
2545
2546 /*
2547    Conversion main loop. Code detection only.
2548  */
2549
2550 nkf_char kanji_convert(FILE *f)
2551 {
2552     nkf_char    c3, c2=0, c1, c0=0;
2553     int is_8bit = FALSE;
2554
2555     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2556 #ifdef UTF8_INPUT_ENABLE
2557        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2558 #endif
2559       ){
2560         is_8bit = TRUE;
2561     }
2562
2563     input_mode = ASCII;
2564     output_mode = ASCII;
2565     shift_mode = FALSE;
2566
2567 #define NEXT continue      /* no output, get next */
2568 #define SEND ;             /* output c1 and c2, get next */
2569 #define LAST break         /* end of loop, go closing  */
2570
2571     module_connection();
2572     check_bom(f);
2573
2574     while ((c1 = (*i_getc)(f)) != EOF) {
2575 #ifdef INPUT_CODE_FIX
2576         if (!input_f)
2577 #endif
2578             code_status(c1);
2579         if (c2) {
2580             /* second byte */
2581             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2582                 /* in case of 8th bit is on */
2583                 if (!estab_f&&!mime_decode_mode) {
2584                     /* in case of not established yet */
2585                     /* It is still ambiguious */
2586                     if (h_conv(f, c2, c1)==EOF)
2587                         LAST;
2588                     else
2589                         c2 = 0;
2590                     NEXT;
2591                 } else {
2592                     /* in case of already established */
2593                     if (c1 < AT) {
2594                         /* ignore bogus code and not CP5022x UCD */
2595                         c2 = 0;
2596                         NEXT;
2597                     } else {
2598                         SEND;
2599                     }
2600                 }
2601             } else
2602                 /* second byte, 7 bit code */
2603                 /* it might be kanji shitfted */
2604                 if ((c1 == DEL) || (c1 <= SP)) {
2605                     /* ignore bogus first code */
2606                     c2 = 0;
2607                     NEXT;
2608                 } else
2609                     SEND;
2610         } else {
2611             /* first byte */
2612 #ifdef UTF8_INPUT_ENABLE
2613             if (iconv == w_iconv16) {
2614                 if (input_endian == ENDIAN_BIG) {
2615                     c2 = c1;
2616                     if ((c1 = (*i_getc)(f)) != EOF) {
2617                         if (0xD8 <= c2 && c2 <= 0xDB) {
2618                             if ((c0 = (*i_getc)(f)) != EOF) {
2619                                 c0 <<= 8;
2620                                 if ((c3 = (*i_getc)(f)) != EOF) {
2621                                     c0 |= c3;
2622                                 } else c2 = EOF;
2623                             } else c2 = EOF;
2624                         }
2625                     } else c2 = EOF;
2626                 } else {
2627                     if ((c2 = (*i_getc)(f)) != EOF) {
2628                         if (0xD8 <= c2 && c2 <= 0xDB) {
2629                             if ((c3 = (*i_getc)(f)) != EOF) {
2630                                 if ((c0 = (*i_getc)(f)) != EOF) {
2631                                     c0 <<= 8;
2632                                     c0 |= c3;
2633                                 } else c2 = EOF;
2634                             } else c2 = EOF;
2635                         }
2636                     } else c2 = EOF;
2637                 }
2638                 SEND;
2639             } else if(iconv == w_iconv32){
2640                 int c3 = c1;
2641                 if((c2 = (*i_getc)(f)) != EOF &&
2642                    (c1 = (*i_getc)(f)) != EOF &&
2643                    (c0 = (*i_getc)(f)) != EOF){
2644                     switch(input_endian){
2645                     case ENDIAN_BIG:
2646                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2647                         break;
2648                     case ENDIAN_LITTLE:
2649                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2650                         break;
2651                     case ENDIAN_2143:
2652                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2653                         break;
2654                     case ENDIAN_3412:
2655                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2656                         break;
2657                     }
2658                     c2 = 0;
2659                 }else{
2660                     c2 = EOF;
2661                 }
2662                 SEND;
2663             } else
2664 #endif
2665 #ifdef NUMCHAR_OPTION
2666             if (is_unicode_capsule(c1)){
2667                 SEND;
2668             } else
2669 #endif
2670             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2671                 /* 8 bit code */
2672                 if (!estab_f && !iso8859_f) {
2673                     /* not established yet */
2674                     c2 = c1;
2675                     NEXT;
2676                 } else { /* estab_f==TRUE */
2677                     if (iso8859_f) {
2678                         c2 = ISO8859_1;
2679                         c1 &= 0x7f;
2680                         SEND;
2681                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2682                         /* SJIS X0201 Case... */
2683                         if (iso2022jp_f && !x0201_f) {
2684                             (*oconv)(GETA1, GETA2);
2685                             NEXT;
2686                         } else {
2687                             c2 = X0201;
2688                             c1 &= 0x7f;
2689                             SEND;
2690                         }
2691                     } else if (c1==SSO && iconv != s_iconv) {
2692                         /* EUC X0201 Case */
2693                         c1 = (*i_getc)(f);  /* skip SSO */
2694                         code_status(c1);
2695                         if (SSP<=c1 && c1<0xe0) {
2696                             if (iso2022jp_f && !x0201_f) {
2697                                 (*oconv)(GETA1, GETA2);
2698                                 NEXT;
2699                             } else {
2700                                 c2 = X0201;
2701                                 c1 &= 0x7f;
2702                                 SEND;
2703                             }
2704                         } else  { /* bogus code, skip SSO and one byte */
2705                             NEXT;
2706                         }
2707                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2708                                (c1 == 0xFD || c1 == 0xFE)) {
2709                         /* CP10001 */
2710                         c2 = X0201;
2711                         c1 &= 0x7f;
2712                         SEND;
2713                     } else {
2714                        /* already established */
2715                        c2 = c1;
2716                        NEXT;
2717                     }
2718                 }
2719             } else if ((c1 > SP) && (c1 != DEL)) {
2720                 /* in case of Roman characters */
2721                 if (shift_mode) {
2722                     /* output 1 shifted byte */
2723                     if (iso8859_f) {
2724                         c2 = ISO8859_1;
2725                         SEND;
2726                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2727                       /* output 1 shifted byte */
2728                         if (iso2022jp_f && !x0201_f) {
2729                             (*oconv)(GETA1, GETA2);
2730                             NEXT;
2731                         } else {
2732                             c2 = X0201;
2733                             SEND;
2734                         }
2735                     } else {
2736                         /* look like bogus code */
2737                         NEXT;
2738                     }
2739                 } else if (input_mode == X0208 || input_mode == X0212 ||
2740                            input_mode == X0213_1 || input_mode == X0213_2) {
2741                     /* in case of Kanji shifted */
2742                     c2 = c1;
2743                     NEXT;
2744                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2745                     /* Check MIME code */
2746                     if ((c1 = (*i_getc)(f)) == EOF) {
2747                         (*oconv)(0, '=');
2748                         LAST;
2749                     } else if (c1 == '?') {
2750                         /* =? is mime conversion start sequence */
2751                         if(mime_f == STRICT_MIME) {
2752                             /* check in real detail */
2753                             if (mime_begin_strict(f) == EOF)
2754                                 LAST;
2755                             else
2756                                 NEXT;
2757                         } else if (mime_begin(f) == EOF)
2758                             LAST;
2759                         else
2760                             NEXT;
2761                     } else {
2762                         (*oconv)(0, '=');
2763                         (*i_ungetc)(c1,f);
2764                         NEXT;
2765                     }
2766                 } else {
2767                     /* normal ASCII code */
2768                     SEND;
2769                 }
2770             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2771                 shift_mode = FALSE;
2772                 NEXT;
2773             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2774                 shift_mode = TRUE;
2775                 NEXT;
2776             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2777                 if ((c1 = (*i_getc)(f)) == EOF) {
2778                     /*  (*oconv)(0, ESC); don't send bogus code */
2779                     LAST;
2780                 } else if (c1 == '$') {
2781                     if ((c1 = (*i_getc)(f)) == EOF) {
2782                         /*
2783                         (*oconv)(0, ESC); don't send bogus code
2784                         (*oconv)(0, '$'); */
2785                         LAST;
2786                     } else if (c1 == '@'|| c1 == 'B') {
2787                         /* This is kanji introduction */
2788                         input_mode = X0208;
2789                         shift_mode = FALSE;
2790                         set_input_codename("ISO-2022-JP");
2791 #ifdef CHECK_OPTION
2792                         debug("ISO-2022-JP");
2793 #endif
2794                         NEXT;
2795                     } else if (c1 == '(') {
2796                         if ((c1 = (*i_getc)(f)) == EOF) {
2797                             /* don't send bogus code
2798                             (*oconv)(0, ESC);
2799                             (*oconv)(0, '$');
2800                             (*oconv)(0, '(');
2801                                 */
2802                             LAST;
2803                         } else if (c1 == '@'|| c1 == 'B') {
2804                             /* This is kanji introduction */
2805                             input_mode = X0208;
2806                             shift_mode = FALSE;
2807                             NEXT;
2808 #ifdef X0212_ENABLE
2809                         } else if (c1 == 'D'){
2810                             input_mode = X0212;
2811                             shift_mode = FALSE;
2812                             NEXT;
2813 #endif /* X0212_ENABLE */
2814                         } else if (c1 == (X0213_1&0x7F)){
2815                             input_mode = X0213_1;
2816                             shift_mode = FALSE;
2817                             NEXT;
2818                         } else if (c1 == (X0213_2&0x7F)){
2819                             input_mode = X0213_2;
2820                             shift_mode = FALSE;
2821                             NEXT;
2822                         } else {
2823                             /* could be some special code */
2824                             (*oconv)(0, ESC);
2825                             (*oconv)(0, '$');
2826                             (*oconv)(0, '(');
2827                             (*oconv)(0, c1);
2828                             NEXT;
2829                         }
2830                     } else if (broken_f&0x2) {
2831                         /* accept any ESC-(-x as broken code ... */
2832                         input_mode = X0208;
2833                         shift_mode = FALSE;
2834                         NEXT;
2835                     } else {
2836                         (*oconv)(0, ESC);
2837                         (*oconv)(0, '$');
2838                         (*oconv)(0, c1);
2839                         NEXT;
2840                     }
2841                 } else if (c1 == '(') {
2842                     if ((c1 = (*i_getc)(f)) == EOF) {
2843                         /* don't send bogus code
2844                         (*oconv)(0, ESC);
2845                         (*oconv)(0, '('); */
2846                         LAST;
2847                     } else {
2848                         if (c1 == 'I') {
2849                             /* This is X0201 kana introduction */
2850                             input_mode = X0201; shift_mode = X0201;
2851                             NEXT;
2852                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2853                             /* This is X0208 kanji introduction */
2854                             input_mode = ASCII; shift_mode = FALSE;
2855                             NEXT;
2856                         } else if (broken_f&0x2) {
2857                             input_mode = ASCII; shift_mode = FALSE;
2858                             NEXT;
2859                         } else {
2860                             (*oconv)(0, ESC);
2861                             (*oconv)(0, '(');
2862                             /* maintain various input_mode here */
2863                             SEND;
2864                         }
2865                     }
2866                } else if ( c1 == 'N' || c1 == 'n'){
2867                    /* SS2 */
2868                    c3 = (*i_getc)(f);  /* skip SS2 */
2869                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2870                        c1 = c3;
2871                        c2 = X0201;
2872                        SEND;
2873                    }else{
2874                        (*i_ungetc)(c3, f);
2875                        /* lonely ESC  */
2876                        (*oconv)(0, ESC);
2877                        SEND;
2878                    }
2879                 } else {
2880                     /* lonely ESC  */
2881                     (*oconv)(0, ESC);
2882                     SEND;
2883                 }
2884             } else if (c1 == ESC && iconv == s_iconv) {
2885                 /* ESC in Shift_JIS */
2886                 if ((c1 = (*i_getc)(f)) == EOF) {
2887                     /*  (*oconv)(0, ESC); don't send bogus code */
2888                     LAST;
2889                 } else if (c1 == '$') {
2890                     /* J-PHONE emoji */
2891                     if ((c1 = (*i_getc)(f)) == EOF) {
2892                         /*
2893                            (*oconv)(0, ESC); don't send bogus code
2894                            (*oconv)(0, '$'); */
2895                         LAST;
2896                     } else {
2897                         if (('E' <= c1 && c1 <= 'G') ||
2898                             ('O' <= c1 && c1 <= 'Q')) {
2899                             /*
2900                                NUM : 0 1 2 3 4 5
2901                                BYTE: G E F O P Q
2902                                C%7 : 1 6 0 2 3 4
2903                                C%7 : 0 1 2 3 4 5 6
2904                                NUM : 2 0 3 4 5 X 1
2905                              */
2906                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2907                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2908                             while ((c1 = (*i_getc)(f)) != EOF) {
2909                                 if (SP <= c1 && c1 <= 'z') {
2910                                     (*oconv)(0, c1 + c0);
2911                                 } else break; /* c1 == SO */
2912                             }
2913                         }
2914                     }
2915                     if (c1 == EOF) LAST;
2916                     NEXT;
2917                 } else {
2918                     /* lonely ESC  */
2919                     (*oconv)(0, ESC);
2920                     SEND;
2921                 }
2922             } else if (c1 == LF || c1 == CR) {
2923                 if (broken_f&4) {
2924                     input_mode = ASCII; set_iconv(FALSE, 0);
2925                     SEND;
2926                 } else if (mime_decode_f && !mime_decode_mode){
2927                     if (c1 == LF) {
2928                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2929                             i_ungetc(SP,f);
2930                             continue;
2931                         } else {
2932                             i_ungetc(c1,f);
2933                         }
2934                         c1 = LF;
2935                         SEND;
2936                     } else  { /* if (c1 == CR)*/
2937                         if ((c1=(*i_getc)(f))!=EOF) {
2938                             if (c1==SP) {
2939                                 i_ungetc(SP,f);
2940                                 continue;
2941                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
2942                                 i_ungetc(SP,f);
2943                                 continue;
2944                             } else {
2945                                 i_ungetc(c1,f);
2946                             }
2947                             i_ungetc(LF,f);
2948                         } else {
2949                             i_ungetc(c1,f);
2950                         }
2951                         c1 = CR;
2952                         SEND;
2953                     }
2954                 }
2955             } else if (c1 == DEL && input_mode == X0208) {
2956                 /* CP5022x */
2957                 c2 = c1;
2958                 NEXT;
2959             } else
2960                 SEND;
2961         }
2962         /* send: */
2963         switch(input_mode){
2964         case ASCII:
2965             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2966             case -2:
2967                 /* 4 bytes UTF-8 */
2968                 if ((c0 = (*i_getc)(f)) != EOF) {
2969                     code_status(c0);
2970                     c0 <<= 8;
2971                     if ((c3 = (*i_getc)(f)) != EOF) {
2972                         code_status(c3);
2973                         (*iconv)(c2, c1, c0|c3);
2974                     }
2975                 }
2976                 break;
2977             case -1:
2978                 /* 3 bytes EUC or UTF-8 */
2979                 if ((c0 = (*i_getc)(f)) != EOF) {
2980                     code_status(c0);
2981                     (*iconv)(c2, c1, c0);
2982                 }
2983                 break;
2984             }
2985             break;
2986         case X0208:
2987         case X0213_1:
2988             if (ms_ucs_map_f &&
2989                 0x7F <= c2 && c2 <= 0x92 &&
2990                 0x21 <= c1 && c1 <= 0x7E) {
2991                 /* CP932 UDC */
2992                 if(c1 == 0x7F) return 0;
2993                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2994                 c2 = 0;
2995             }
2996             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2997             break;
2998 #ifdef X0212_ENABLE
2999         case X0212:
3000             (*oconv)(PREFIX_EUCG3 | c2, c1);
3001             break;
3002 #endif /* X0212_ENABLE */
3003         case X0213_2:
3004             (*oconv)(PREFIX_EUCG3 | c2, c1);
3005             break;
3006         default:
3007             (*oconv)(input_mode, c1);  /* other special case */
3008         }
3009
3010         c2 = 0;
3011         c0 = 0;
3012         continue;
3013         /* goto next_word */
3014     }
3015
3016     /* epilogue */
3017     (*iconv)(EOF, 0, 0);
3018     if (!input_codename)
3019     {
3020         if (is_8bit) {
3021             struct input_code *p = input_code_list;
3022             struct input_code *result = p;
3023             while (p->name){
3024                 if (p->score < result->score) result = p;
3025                 ++p;
3026             }
3027             set_input_codename(result->name);
3028 #ifdef CHECK_OPTION
3029             debug(result->name);
3030 #endif
3031         }
3032     }
3033     return 1;
3034 }
3035
3036 nkf_char
3037 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3038 {
3039     nkf_char ret, c3, c0;
3040     int hold_index;
3041
3042
3043     /** it must NOT be in the kanji shifte sequence      */
3044     /** it must NOT be written in JIS7                   */
3045     /** and it must be after 2 byte 8bit code            */
3046
3047     hold_count = 0;
3048     push_hold_buf(c2);
3049     push_hold_buf(c1);
3050
3051     while ((c1 = (*i_getc)(f)) != EOF) {
3052         if (c1 == ESC){
3053             (*i_ungetc)(c1,f);
3054             break;
3055         }
3056         code_status(c1);
3057         if (push_hold_buf(c1) == EOF || estab_f){
3058             break;
3059         }
3060     }
3061
3062     if (!estab_f){
3063         struct input_code *p = input_code_list;
3064         struct input_code *result = p;
3065         if (c1 == EOF){
3066             code_status(c1);
3067         }
3068         while (p->name){
3069             if (p->status_func && p->score < result->score){
3070                 result = p;
3071             }
3072             ++p;
3073         }
3074         set_iconv(TRUE, result->iconv_func);
3075     }
3076
3077
3078     /** now,
3079      ** 1) EOF is detected, or
3080      ** 2) Code is established, or
3081      ** 3) Buffer is FULL (but last word is pushed)
3082      **
3083      ** in 1) and 3) cases, we continue to use
3084      ** Kanji codes by oconv and leave estab_f unchanged.
3085      **/
3086
3087     ret = c1;
3088     hold_index = 0;
3089     while (hold_index < hold_count){
3090         c2 = hold_buf[hold_index++];
3091         if (c2 <= DEL
3092 #ifdef NUMCHAR_OPTION
3093             || is_unicode_capsule(c2)
3094 #endif
3095             ){
3096             (*iconv)(0, c2, 0);
3097             continue;
3098         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3099             (*iconv)(X0201, c2, 0);
3100             continue;
3101         }
3102         if (hold_index < hold_count){
3103             c1 = hold_buf[hold_index++];
3104         }else{
3105             c1 = (*i_getc)(f);
3106             if (c1 == EOF){
3107                 c3 = EOF;
3108                 break;
3109             }
3110             code_status(c1);
3111         }
3112         c0 = 0;
3113         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3114         case -2:
3115             /* 4 bytes UTF-8 */
3116             if (hold_index < hold_count){
3117                 c0 = hold_buf[hold_index++];
3118             } else if ((c0 = (*i_getc)(f)) == EOF) {
3119                 ret = EOF;
3120                 break;
3121             } else {
3122                 code_status(c0);
3123                 c0 <<= 8;
3124                 if (hold_index < hold_count){
3125                     c3 = hold_buf[hold_index++];
3126                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3127                     c0 = ret = EOF;
3128                     break;
3129                 } else {
3130                     code_status(c3);
3131                     (*iconv)(c2, c1, c0|c3);
3132                 }
3133             }
3134             break;
3135         case -1:
3136             /* 3 bytes EUC or UTF-8 */
3137             if (hold_index < hold_count){
3138                 c0 = hold_buf[hold_index++];
3139             } else if ((c0 = (*i_getc)(f)) == EOF) {
3140                 ret = EOF;
3141                 break;
3142             } else {
3143                 code_status(c0);
3144             }
3145             (*iconv)(c2, c1, c0);
3146             break;
3147         }
3148         if (c0 == EOF) break;
3149     }
3150     return ret;
3151 }
3152
3153 nkf_char push_hold_buf(nkf_char c2)
3154 {
3155     if (hold_count >= HOLD_SIZE*2)
3156         return (EOF);
3157     hold_buf[hold_count++] = (unsigned char)c2;
3158     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3159 }
3160
3161 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3162 {
3163 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3164     nkf_char val;
3165 #endif
3166     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3167 #ifdef SHIFTJIS_CP932
3168     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3169         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3170         if (val){
3171             c2 = val >> 8;
3172             c1 = val & 0xff;
3173         }
3174     }
3175     if (cp932inv_f
3176         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3177         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3178         if (c){
3179             c2 = c >> 8;
3180             c1 = c & 0xff;
3181         }
3182     }
3183 #endif /* SHIFTJIS_CP932 */
3184 #ifdef X0212_ENABLE
3185     if (!x0213_f && is_ibmext_in_sjis(c2)){
3186         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3187         if (val){
3188             if (val > 0x7FFF){
3189                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3190                 c1 = val & 0xff;
3191             }else{
3192                 c2 = val >> 8;
3193                 c1 = val & 0xff;
3194             }
3195             if (p2) *p2 = c2;
3196             if (p1) *p1 = c1;
3197             return 0;
3198         }
3199     }
3200 #endif
3201     if(c2 >= 0x80){
3202         if(x0213_f && c2 >= 0xF0){
3203             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3204                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3205             }else{ /* 78<=k<=94 */
3206                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3207                 if (0x9E < c1) c2++;
3208             }
3209         }else{
3210             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3211             if (0x9E < c1) c2++;
3212         }
3213         if (c1 < 0x9F)
3214             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3215         else {
3216             c1 = c1 - 0x7E;
3217         }
3218     }
3219
3220 #ifdef X0212_ENABLE
3221     c2 = x0212_unshift(c2);
3222 #endif
3223     if (p2) *p2 = c2;
3224     if (p1) *p1 = c1;
3225     return 0;
3226 }
3227
3228 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3229 {
3230     if (c2 == X0201) {
3231         c1 &= 0x7f;
3232     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3233         /* NOP */
3234     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3235         /* CP932 UDC */
3236         if(c1 == 0x7F) return 0;
3237         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3238         c2 = 0;
3239     } else {
3240         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3241         if (ret) return ret;
3242     }
3243     (*oconv)(c2, c1);
3244     return 0;
3245 }
3246
3247 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3248 {
3249     if (c2 == X0201) {
3250         c1 &= 0x7f;
3251 #ifdef X0212_ENABLE
3252     }else if (c2 == 0x8f){
3253         if (c0 == 0){
3254             return -1;
3255         }
3256         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3257             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3258             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3259             c2 = 0;
3260         } else {
3261             c2 = (c2 << 8) | (c1 & 0x7f);
3262             c1 = c0 & 0x7f;
3263 #ifdef SHIFTJIS_CP932
3264             if (cp51932_f){
3265                 nkf_char s2, s1;
3266                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3267                     s2e_conv(s2, s1, &c2, &c1);
3268                     if (c2 < 0x100){
3269                         c1 &= 0x7f;
3270                         c2 &= 0x7f;
3271                     }
3272                 }
3273             }
3274 #endif /* SHIFTJIS_CP932 */
3275         }
3276 #endif /* X0212_ENABLE */
3277     } else if (c2 == SSO){
3278         c2 = X0201;
3279         c1 &= 0x7f;
3280     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3281         /* NOP */
3282     } else {
3283         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3284             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3285             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3286             c2 = 0;
3287         } else {
3288             c1 &= 0x7f;
3289             c2 &= 0x7f;
3290 #ifdef SHIFTJIS_CP932
3291             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3292                 nkf_char s2, s1;
3293                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3294                     s2e_conv(s2, s1, &c2, &c1);
3295                     if (c2 < 0x100){
3296                         c1 &= 0x7f;
3297                         c2 &= 0x7f;
3298                     }
3299                 }
3300             }
3301 #endif /* SHIFTJIS_CP932 */
3302         }
3303     }
3304     (*oconv)(c2, c1);
3305     return 0;
3306 }
3307
3308 #ifdef UTF8_INPUT_ENABLE
3309 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3310 {
3311     nkf_char ret = 0;
3312
3313     if (!c1){
3314         *p2 = 0;
3315         *p1 = c2;
3316     }else if (0xc0 <= c2 && c2 <= 0xef) {
3317         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3318 #ifdef NUMCHAR_OPTION
3319         if (ret > 0){
3320             if (p2) *p2 = 0;
3321             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3322             ret = 0;
3323         }
3324 #endif
3325     }
3326     return ret;
3327 }
3328
3329 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3330 {
3331     nkf_char ret = 0;
3332     static const char w_iconv_utf8_1st_byte[] =
3333     { /* 0xC0 - 0xFF */
3334         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3335         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3336         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3337         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3338
3339     if (c2 < 0 || 0xff < c2) {
3340     }else if (c2 == 0) { /* 0 : 1 byte*/
3341         c0 = 0;
3342     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3343         return 0;
3344     } else{
3345         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3346         case 21:
3347             if (c1 < 0x80 || 0xBF < c1) return 0;
3348             break;
3349         case 30:
3350             if (c0 == 0) return -1;
3351             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3352                 return 0;
3353             break;
3354         case 31:
3355         case 33:
3356             if (c0 == 0) return -1;
3357             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3358                 return 0;
3359             break;
3360         case 32:
3361             if (c0 == 0) return -1;
3362             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3363                 return 0;
3364             break;
3365         case 40:
3366             if (c0 == 0) return -2;
3367             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3368                 return 0;
3369             break;
3370         case 41:
3371             if (c0 == 0) return -2;
3372             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3373                 return 0;
3374             break;
3375         case 42:
3376             if (c0 == 0) return -2;
3377             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3378                 return 0;
3379             break;
3380         default:
3381             return 0;
3382             break;
3383         }
3384     }
3385     if (c2 == 0 || c2 == EOF){
3386     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3387         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3388         c2 = 0;
3389     } else {
3390         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3391     }
3392     if (ret == 0){
3393         (*oconv)(c2, c1);
3394     }
3395     return ret;
3396 }
3397 #endif
3398
3399 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3400 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3401 {
3402     val &= VALUE_MASK;
3403     if (val < 0x80){
3404         *p2 = val;
3405         *p1 = 0;
3406         *p0 = 0;
3407     }else if (val < 0x800){
3408         *p2 = 0xc0 | (val >> 6);
3409         *p1 = 0x80 | (val & 0x3f);
3410         *p0 = 0;
3411     } else if (val <= NKF_INT32_C(0xFFFF)) {
3412         *p2 = 0xe0 | (val >> 12);
3413         *p1 = 0x80 | ((val >> 6) & 0x3f);
3414         *p0 = 0x80 | (val        & 0x3f);
3415     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3416         *p2 = 0xe0 |  (val >> 16);
3417         *p1 = 0x80 | ((val >> 12) & 0x3f);
3418         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3419     } else {
3420         *p2 = 0;
3421         *p1 = 0;
3422         *p0 = 0;
3423     }
3424 }
3425 #endif
3426
3427 #ifdef UTF8_INPUT_ENABLE
3428 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3429 {
3430     nkf_char val;
3431     if (c2 >= 0xf8) {
3432         val = -1;
3433     } else if (c2 >= 0xf0){
3434         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3435         val = (c2 & 0x0f) << 18;
3436         val |= (c1 & 0x3f) << 12;
3437         val |= (c0 & 0x3f00) >> 2;
3438         val |= (c0 & 0x3f);
3439     }else if (c2 >= 0xe0){
3440         val = (c2 & 0x0f) << 12;
3441         val |= (c1 & 0x3f) << 6;
3442         val |= (c0 & 0x3f);
3443     }else if (c2 >= 0xc0){
3444         val = (c2 & 0x1f) << 6;
3445         val |= (c1 & 0x3f);
3446     }else{
3447         val = c2;
3448     }
3449     return val;
3450 }
3451
3452 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3453 {
3454     nkf_char c2, c1, c0;
3455     nkf_char ret = 0;
3456     val &= VALUE_MASK;
3457     if (val < 0x80){
3458         *p2 = 0;
3459         *p1 = val;
3460     }else{
3461         w16w_conv(val, &c2, &c1, &c0);
3462         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3463 #ifdef NUMCHAR_OPTION
3464         if (ret > 0){
3465             *p2 = 0;
3466             *p1 = CLASS_UNICODE | val;
3467             ret = 0;
3468         }
3469 #endif
3470     }
3471     return ret;
3472 }
3473 #endif
3474
3475 #ifdef UTF8_INPUT_ENABLE
3476 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3477 {
3478     nkf_char ret = 0;
3479     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3480         (*oconv)(c2, c1);
3481         return 0;
3482     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3483         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3484             return -2;
3485         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3486         c2 = 0;
3487     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3488         /*
3489            return 2;
3490         */
3491         return 1;
3492     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3493     if (ret) return ret;
3494     (*oconv)(c2, c1);
3495     return 0;
3496 }
3497
3498 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3499 {
3500     int ret = 0;
3501
3502     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3503     } else if (is_unicode_bmp(c1)) {
3504         ret = w16e_conv(c1, &c2, &c1);
3505     } else {
3506         c2 = 0;
3507         c1 =  CLASS_UNICODE | c1;
3508     }
3509     if (ret) return ret;
3510     (*oconv)(c2, c1);
3511     return 0;
3512 }
3513
3514 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3515 {
3516     const unsigned short *const *pp;
3517     const unsigned short *const *const *ppp;
3518     static const char no_best_fit_chars_table_C2[] =
3519     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3520         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3521         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3522         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3523     static const char no_best_fit_chars_table_C2_ms[] =
3524     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3525         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3526         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3527         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3528     static const char no_best_fit_chars_table_932_C2[] =
3529     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3530         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3531         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3532         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3533     static const char no_best_fit_chars_table_932_C3[] =
3534     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3535         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3536         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3537         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3538     nkf_char ret = 0;
3539
3540     if(c2 < 0x80){
3541         *p2 = 0;
3542         *p1 = c2;
3543     }else if(c2 < 0xe0){
3544         if(no_best_fit_chars_f){
3545             if(ms_ucs_map_f == UCS_MAP_CP932){
3546                 switch(c2){
3547                 case 0xC2:
3548                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3549                     break;
3550                 case 0xC3:
3551                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3552                     break;
3553                 }
3554             }else if(!cp932inv_f){
3555                 switch(c2){
3556                 case 0xC2:
3557                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3558                     break;
3559                 case 0xC3:
3560                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3561                     break;
3562                 }
3563             }else if(ms_ucs_map_f == UCS_MAP_MS){
3564                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3565             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3566                 switch(c2){
3567                 case 0xC2:
3568                     switch(c1){
3569                     case 0xA2:
3570                     case 0xA3:
3571                     case 0xA5:
3572                     case 0xA6:
3573                     case 0xAC:
3574                     case 0xAF:
3575                     case 0xB8:
3576                         return 1;
3577                     }
3578                     break;
3579                 }
3580             }
3581         }
3582         pp =
3583             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3584             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3585             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3586             utf8_to_euc_2bytes;
3587         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3588     }else if(c0 < 0xF0){
3589         if(no_best_fit_chars_f){
3590             if(ms_ucs_map_f == UCS_MAP_CP932){
3591                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3592             }else if(ms_ucs_map_f == UCS_MAP_MS){
3593                 switch(c2){
3594                 case 0xE2:
3595                     switch(c1){
3596                     case 0x80:
3597                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3598                         break;
3599                     case 0x88:
3600                         if(c0 == 0x92) return 1;
3601                         break;
3602                     }
3603                     break;
3604                 case 0xE3:
3605                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3606                     break;
3607                 }
3608             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3609                 switch(c2){
3610                 case 0xE3:
3611                     switch(c1){
3612                     case 0x82:
3613                             if(c0 == 0x94) return 1;
3614                         break;
3615                     case 0x83:
3616                             if(c0 == 0xBB) return 1;
3617                         break;
3618                     }
3619                     break;
3620                 }
3621             }else{
3622                 switch(c2){
3623                 case 0xE2:
3624                     switch(c1){
3625                     case 0x80:
3626                         if(c0 == 0x95) return 1;
3627                         break;
3628                     case 0x88:
3629                         if(c0 == 0xA5) return 1;
3630                         break;
3631                     }
3632                     break;
3633                 case 0xEF:
3634                     switch(c1){
3635                     case 0xBC:
3636                         if(c0 == 0x8D) return 1;
3637                         break;
3638                     case 0xBD:
3639                         if(c0 == 0x9E && !cp932inv_f) return 1;
3640                         break;
3641                     case 0xBF:
3642                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3643                         break;
3644                     }
3645                     break;
3646                 }
3647             }
3648         }
3649         ppp =
3650             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3651             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3652             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3653             utf8_to_euc_3bytes;
3654         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3655     }else return -1;
3656 #ifdef SHIFTJIS_CP932
3657     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3658         nkf_char s2, s1;
3659         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3660             s2e_conv(s2, s1, p2, p1);
3661         }else{
3662             ret = 1;
3663         }
3664     }
3665 #endif
3666     return ret;
3667 }
3668
3669 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3670 {
3671     nkf_char c2;
3672     const unsigned short *p;
3673     unsigned short val;
3674
3675     if (pp == 0) return 1;
3676
3677     c1 -= 0x80;
3678     if (c1 < 0 || psize <= c1) return 1;
3679     p = pp[c1];
3680     if (p == 0)  return 1;
3681
3682     c0 -= 0x80;
3683     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3684     val = p[c0];
3685     if (val == 0) return 1;
3686     if (no_cp932ext_f && (
3687         (val>>8) == 0x2D || /* NEC special characters */
3688         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3689         )) return 1;
3690
3691     c2 = val >> 8;
3692    if (val > 0x7FFF){
3693         c2 &= 0x7f;
3694         c2 |= PREFIX_EUCG3;
3695     }
3696     if (c2 == SO) c2 = X0201;
3697     c1 = val & 0x7f;
3698     if (p2) *p2 = c2;
3699     if (p1) *p1 = c1;
3700     return 0;
3701 }
3702
3703 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3704 {
3705     int shift = 20;
3706     c &= VALUE_MASK;
3707     while(shift >= 0){
3708         if(c >= 1<<shift){
3709             while(shift >= 0){
3710                 (*f)(0, bin2hex(c>>shift));
3711                 shift -= 4;
3712             }
3713         }else{
3714             shift -= 4;
3715         }
3716     }
3717     return;
3718 }
3719
3720 void encode_fallback_html(nkf_char c)
3721 {
3722     (*oconv)(0, '&');
3723     (*oconv)(0, '#');
3724     c &= VALUE_MASK;
3725     if(c >= NKF_INT32_C(1000000))
3726         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3727     if(c >= NKF_INT32_C(100000))
3728         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3729     if(c >= 10000)
3730         (*oconv)(0, 0x30+(c/10000  )%10);
3731     if(c >= 1000)
3732         (*oconv)(0, 0x30+(c/1000   )%10);
3733     if(c >= 100)
3734         (*oconv)(0, 0x30+(c/100    )%10);
3735     if(c >= 10)
3736         (*oconv)(0, 0x30+(c/10     )%10);
3737     if(c >= 0)
3738         (*oconv)(0, 0x30+ c         %10);
3739     (*oconv)(0, ';');
3740     return;
3741 }
3742
3743 void encode_fallback_xml(nkf_char c)
3744 {
3745     (*oconv)(0, '&');
3746     (*oconv)(0, '#');
3747     (*oconv)(0, 'x');
3748     nkf_each_char_to_hex(oconv, c);
3749     (*oconv)(0, ';');
3750     return;
3751 }
3752
3753 void encode_fallback_java(nkf_char c)
3754 {
3755     (*oconv)(0, '\\');
3756     c &= VALUE_MASK;
3757     if(!is_unicode_bmp(c)){
3758         (*oconv)(0, 'U');
3759         (*oconv)(0, '0');
3760         (*oconv)(0, '0');
3761         (*oconv)(0, bin2hex(c>>20));
3762         (*oconv)(0, bin2hex(c>>16));
3763     }else{
3764         (*oconv)(0, 'u');
3765     }
3766     (*oconv)(0, bin2hex(c>>12));
3767     (*oconv)(0, bin2hex(c>> 8));
3768     (*oconv)(0, bin2hex(c>> 4));
3769     (*oconv)(0, bin2hex(c    ));
3770     return;
3771 }
3772
3773 void encode_fallback_perl(nkf_char c)
3774 {
3775     (*oconv)(0, '\\');
3776     (*oconv)(0, 'x');
3777     (*oconv)(0, '{');
3778     nkf_each_char_to_hex(oconv, c);
3779     (*oconv)(0, '}');
3780     return;
3781 }
3782
3783 void encode_fallback_subchar(nkf_char c)
3784 {
3785     c = unicode_subchar;
3786     (*oconv)((c>>8)&0xFF, c&0xFF);
3787     return;
3788 }
3789 #endif
3790
3791 #ifdef UTF8_OUTPUT_ENABLE
3792 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3793 {
3794     const unsigned short *p;
3795
3796     if (c2 == X0201) {
3797         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3798             switch (c1) {
3799             case 0x20:
3800                 return 0xA0;
3801             case 0x7D:
3802                 return 0xA9;
3803             }
3804         }
3805         p = euc_to_utf8_1byte;
3806 #ifdef X0212_ENABLE
3807     } else if (is_eucg3(c2)){
3808         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3809             return 0xA6;
3810         }
3811         c2 = (c2&0x7f) - 0x21;
3812         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3813             p = x0212_to_utf8_2bytes[c2];
3814         else
3815             return 0;
3816 #endif
3817     } else {
3818         c2 &= 0x7f;
3819         c2 = (c2&0x7f) - 0x21;
3820         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3821             p =
3822                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3823                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3824                 euc_to_utf8_2bytes_ms[c2];
3825         else
3826             return 0;
3827     }
3828     if (!p) return 0;
3829     c1 = (c1 & 0x7f) - 0x21;
3830     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3831         return p[c1];
3832     return 0;
3833 }
3834
3835 void w_oconv(nkf_char c2, nkf_char c1)
3836 {
3837     nkf_char c0;
3838     nkf_char val;
3839
3840     if (output_bom_f) {
3841         output_bom_f = FALSE;
3842         (*o_putc)('\357');
3843         (*o_putc)('\273');
3844         (*o_putc)('\277');
3845     }
3846
3847     if (c2 == EOF) {
3848         (*o_putc)(EOF);
3849         return;
3850     }
3851
3852 #ifdef NUMCHAR_OPTION
3853     if (c2 == 0 && is_unicode_capsule(c1)){
3854         val = c1 & VALUE_MASK;
3855         if (val < 0x80){
3856             (*o_putc)(val);
3857         }else if (val < 0x800){
3858             (*o_putc)(0xC0 | (val >> 6));
3859             (*o_putc)(0x80 | (val & 0x3f));
3860         } else if (val <= NKF_INT32_C(0xFFFF)) {
3861             (*o_putc)(0xE0 | (val >> 12));
3862             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3863             (*o_putc)(0x80 | (val        & 0x3f));
3864         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3865             (*o_putc)(0xF0 | ( val>>18));
3866             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3867             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3868             (*o_putc)(0x80 | ( val      & 0x3f));
3869         }
3870         return;
3871     }
3872 #endif
3873
3874     if (c2 == 0) {
3875         output_mode = ASCII;
3876         (*o_putc)(c1);
3877     } else if (c2 == ISO8859_1) {
3878         output_mode = UTF8;
3879         (*o_putc)(c1 | 0x080);
3880     } else {
3881         output_mode = UTF8;
3882         val = e2w_conv(c2, c1);
3883         if (val){
3884             w16w_conv(val, &c2, &c1, &c0);
3885             (*o_putc)(c2);
3886             if (c1){
3887                 (*o_putc)(c1);
3888                 if (c0) (*o_putc)(c0);
3889             }
3890         }
3891     }
3892 }
3893
3894 void w_oconv16(nkf_char c2, nkf_char c1)
3895 {
3896     if (output_bom_f) {
3897         output_bom_f = FALSE;
3898         if (output_endian == ENDIAN_LITTLE){
3899             (*o_putc)((unsigned char)'\377');
3900             (*o_putc)('\376');
3901         }else{
3902             (*o_putc)('\376');
3903             (*o_putc)((unsigned char)'\377');
3904         }
3905     }
3906
3907     if (c2 == EOF) {
3908         (*o_putc)(EOF);
3909         return;
3910     }
3911
3912     if (c2 == ISO8859_1) {
3913         c2 = 0;
3914         c1 |= 0x80;
3915 #ifdef NUMCHAR_OPTION
3916     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3917         if (is_unicode_bmp(c1)) {
3918             c2 = (c1 >> 8) & 0xff;
3919             c1 &= 0xff;
3920         } else {
3921             c1 &= VALUE_MASK;
3922             if (c1 <= UNICODE_MAX) {
3923                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3924                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3925                 if (output_endian == ENDIAN_LITTLE){
3926                     (*o_putc)(c2 & 0xff);
3927                     (*o_putc)((c2 >> 8) & 0xff);
3928                     (*o_putc)(c1 & 0xff);
3929                     (*o_putc)((c1 >> 8) & 0xff);
3930                 }else{
3931                     (*o_putc)((c2 >> 8) & 0xff);
3932                     (*o_putc)(c2 & 0xff);
3933                     (*o_putc)((c1 >> 8) & 0xff);
3934                     (*o_putc)(c1 & 0xff);
3935                 }
3936             }
3937             return;
3938         }
3939 #endif
3940     } else if (c2) {
3941         nkf_char val = e2w_conv(c2, c1);
3942         c2 = (val >> 8) & 0xff;
3943         c1 = val & 0xff;
3944         if (!val) return;
3945     }
3946     if (output_endian == ENDIAN_LITTLE){
3947         (*o_putc)(c1);
3948         (*o_putc)(c2);
3949     }else{
3950         (*o_putc)(c2);
3951         (*o_putc)(c1);
3952     }
3953 }
3954
3955 void w_oconv32(nkf_char c2, nkf_char c1)
3956 {
3957     if (output_bom_f) {
3958         output_bom_f = FALSE;
3959         if (output_endian == ENDIAN_LITTLE){
3960             (*o_putc)((unsigned char)'\377');
3961             (*o_putc)('\376');
3962             (*o_putc)('\000');
3963             (*o_putc)('\000');
3964         }else{
3965             (*o_putc)('\000');
3966             (*o_putc)('\000');
3967             (*o_putc)('\376');
3968             (*o_putc)((unsigned char)'\377');
3969         }
3970     }
3971
3972     if (c2 == EOF) {
3973         (*o_putc)(EOF);
3974         return;
3975     }
3976
3977     if (c2 == ISO8859_1) {
3978         c1 |= 0x80;
3979 #ifdef NUMCHAR_OPTION
3980     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3981         c1 &= VALUE_MASK;
3982 #endif
3983     } else if (c2) {
3984         c1 = e2w_conv(c2, c1);
3985         if (!c1) return;
3986     }
3987     if (output_endian == ENDIAN_LITTLE){
3988         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3989         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3990         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3991         (*o_putc)('\000');
3992     }else{
3993         (*o_putc)('\000');
3994         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3995         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3996         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3997     }
3998 }
3999 #endif
4000
4001 void e_oconv(nkf_char c2, nkf_char c1)
4002 {
4003 #ifdef NUMCHAR_OPTION
4004     if (c2 == 0 && is_unicode_capsule(c1)){
4005         w16e_conv(c1, &c2, &c1);
4006         if (c2 == 0 && is_unicode_capsule(c1)){
4007             c2 = c1 & VALUE_MASK;
4008             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
4009                 /* eucJP-ms UDC */
4010                 c1 &= 0xFFF;
4011                 c2 = c1 / 94;
4012                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
4013                 c1 = 0x21 + c1 % 94;
4014                 if (is_eucg3(c2)){
4015                     (*o_putc)(0x8f);
4016                     (*o_putc)((c2 & 0x7f) | 0x080);
4017                     (*o_putc)(c1 | 0x080);
4018                 }else{
4019                     (*o_putc)((c2 & 0x7f) | 0x080);
4020                     (*o_putc)(c1 | 0x080);
4021                 }
4022                 return;
4023             } else {
4024                 if (encode_fallback) (*encode_fallback)(c1);
4025                 return;
4026             }
4027         }
4028     }
4029 #endif
4030     if (c2 == EOF) {
4031         (*o_putc)(EOF);
4032         return;
4033     } else if (c2 == 0) {
4034         output_mode = ASCII;
4035         (*o_putc)(c1);
4036     } else if (c2 == X0201) {
4037         output_mode = JAPANESE_EUC;
4038         (*o_putc)(SSO); (*o_putc)(c1|0x80);
4039     } else if (c2 == ISO8859_1) {
4040         output_mode = ISO8859_1;
4041         (*o_putc)(c1 | 0x080);
4042 #ifdef X0212_ENABLE
4043     } else if (is_eucg3(c2)){
4044         output_mode = JAPANESE_EUC;
4045 #ifdef SHIFTJIS_CP932
4046         if (!cp932inv_f){
4047             nkf_char s2, s1;
4048             if (e2s_conv(c2, c1, &s2, &s1) == 0){
4049                 s2e_conv(s2, s1, &c2, &c1);
4050             }
4051         }
4052 #endif
4053         if (c2 == 0) {
4054             output_mode = ASCII;
4055             (*o_putc)(c1);
4056         }else if (is_eucg3(c2)){
4057             if (x0212_f){
4058                 (*o_putc)(0x8f);
4059                 (*o_putc)((c2 & 0x7f) | 0x080);
4060                 (*o_putc)(c1 | 0x080);
4061             }
4062         }else{
4063             (*o_putc)((c2 & 0x7f) | 0x080);
4064             (*o_putc)(c1 | 0x080);
4065         }
4066 #endif
4067     } else {
4068         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
4069             set_iconv(FALSE, 0);
4070             return; /* too late to rescue this char */
4071         }
4072         output_mode = JAPANESE_EUC;
4073         (*o_putc)(c2 | 0x080);
4074         (*o_putc)(c1 | 0x080);
4075     }
4076 }
4077
4078 #ifdef X0212_ENABLE
4079 nkf_char x0212_shift(nkf_char c)
4080 {
4081     nkf_char ret = c;
4082     c &= 0x7f;
4083     if (is_eucg3(ret)){
4084         if (0x75 <= c && c <= 0x7f){
4085             ret = c + (0x109 - 0x75);
4086         }
4087     }else{
4088         if (0x75 <= c && c <= 0x7f){
4089             ret = c + (0x113 - 0x75);
4090         }
4091     }
4092     return ret;
4093 }
4094
4095
4096 nkf_char x0212_unshift(nkf_char c)
4097 {
4098     nkf_char ret = c;
4099     if (0x7f <= c && c <= 0x88){
4100         ret = c + (0x75 - 0x7f);
4101     }else if (0x89 <= c && c <= 0x92){
4102         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4103     }
4104     return ret;
4105 }
4106 #endif /* X0212_ENABLE */
4107
4108 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4109 {
4110     nkf_char ndx;
4111     if (is_eucg3(c2)){
4112         ndx = c2 & 0x7f;
4113         if (x0213_f){
4114             if((0x21 <= ndx && ndx <= 0x2F)){
4115                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4116                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4117                 return 0;
4118             }else if(0x6E <= ndx && ndx <= 0x7E){
4119                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4120                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4121                 return 0;
4122             }
4123             return 1;
4124         }
4125 #ifdef X0212_ENABLE
4126         else if(nkf_isgraph(ndx)){
4127             nkf_char val = 0;
4128             const unsigned short *ptr;
4129             ptr = x0212_shiftjis[ndx - 0x21];
4130             if (ptr){
4131                 val = ptr[(c1 & 0x7f) - 0x21];
4132             }
4133             if (val){
4134                 c2 = val >> 8;
4135                 c1 = val & 0xff;
4136                 if (p2) *p2 = c2;
4137                 if (p1) *p1 = c1;
4138                 return 0;
4139             }
4140             c2 = x0212_shift(c2);
4141         }
4142 #endif /* X0212_ENABLE */
4143     }
4144     if(0x7F < c2) return 1;
4145     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4146     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4147     return 0;
4148 }
4149
4150 void s_oconv(nkf_char c2, nkf_char c1)
4151 {
4152 #ifdef NUMCHAR_OPTION
4153     if (c2 == 0 && is_unicode_capsule(c1)){
4154         w16e_conv(c1, &c2, &c1);
4155         if (c2 == 0 && is_unicode_capsule(c1)){
4156             c2 = c1 & VALUE_MASK;
4157             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
4158                 /* CP932 UDC */
4159                 c1 &= 0xFFF;
4160                 c2 = c1 / 188 + 0xF0;
4161                 c1 = c1 % 188;
4162                 c1 += 0x40 + (c1 > 0x3e);
4163                 (*o_putc)(c2);
4164                 (*o_putc)(c1);
4165                 return;
4166             } else {
4167                 if(encode_fallback)(*encode_fallback)(c1);
4168                 return;
4169             }
4170         }
4171     }
4172 #endif
4173     if (c2 == EOF) {
4174         (*o_putc)(EOF);
4175         return;
4176     } else if (c2 == 0) {
4177         output_mode = ASCII;
4178         (*o_putc)(c1);
4179     } else if (c2 == X0201) {
4180         output_mode = SHIFT_JIS;
4181         (*o_putc)(c1|0x80);
4182     } else if (c2 == ISO8859_1) {
4183         output_mode = ISO8859_1;
4184         (*o_putc)(c1 | 0x080);
4185 #ifdef X0212_ENABLE
4186     } else if (is_eucg3(c2)){
4187         output_mode = SHIFT_JIS;
4188         if (e2s_conv(c2, c1, &c2, &c1) == 0){
4189             (*o_putc)(c2);
4190             (*o_putc)(c1);
4191         }
4192 #endif
4193     } else {
4194         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4195             set_iconv(FALSE, 0);
4196             return; /* too late to rescue this char */
4197         }
4198         output_mode = SHIFT_JIS;
4199         e2s_conv(c2, c1, &c2, &c1);
4200
4201 #ifdef SHIFTJIS_CP932
4202         if (cp932inv_f
4203         &n