OSDN Git Service

4155bef9d4df644b9dd4113a6612acd9b32a4065
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.153 2007/12/18 03:04:10 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-12-18"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42
43 #ifndef MIME_DECODE_DEFAULT
44 #define MIME_DECODE_DEFAULT STRICT_MIME
45 #endif
46 #ifndef X0201_DEFAULT
47 #define X0201_DEFAULT TRUE
48 #endif
49
50 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
51 #define MSDOS
52 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
53 #define __WIN32__
54 #endif
55 #endif
56
57 #ifdef PERL_XS
58 #undef OVERWRITE
59 #endif
60
61 #ifndef PERL_XS
62 #include <stdio.h>
63 #endif
64
65 #include <stdlib.h>
66 #include <string.h>
67
68 #if defined(MSDOS) || defined(__OS2__)
69 #include <fcntl.h>
70 #include <io.h>
71 #if defined(_MSC_VER) || defined(__WATCOMC__)
72 #define mktemp _mktemp
73 #endif
74 #endif
75
76 #ifdef MSDOS
77 #ifdef LSI_C
78 #define setbinmode(fp) fsetbin(fp)
79 #elif defined(__DJGPP__)
80 #include <libc/dosio.h>
81 #define setbinmode(fp) djgpp_setbinmode(fp)
82 #else /* Microsoft C, Turbo C */
83 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
84 #endif
85 #else /* UNIX */
86 #define setbinmode(fp)
87 #endif
88
89 #if defined(__DJGPP__)
90 void  djgpp_setbinmode(FILE *fp)
91 {
92     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
93     int fd, m;
94     fd = fileno(fp);
95     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
96     __file_handle_set(fd, m);
97 }
98 #endif
99
100 #ifdef _IOFBF /* SysV and MSDOS, Windows */
101 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
102 #else /* BSD */
103 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
104 #endif
105
106 /*Borland C++ 4.5 EasyWin*/
107 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
108 #define         EASYWIN
109 #ifndef __WIN16__
110 #define __WIN16__
111 #endif
112 #include <windows.h>
113 #endif
114
115 #ifdef OVERWRITE
116 /* added by satoru@isoternet.org */
117 #if defined(__EMX__)
118 #include <sys/types.h>
119 #endif
120 #include <sys/stat.h>
121 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
122 #include <unistd.h>
123 #if defined(__WATCOMC__)
124 #include <sys/utime.h>
125 #else
126 #include <utime.h>
127 #endif
128 #else /* defined(MSDOS) */
129 #ifdef __WIN32__
130 #ifdef __BORLANDC__ /* BCC32 */
131 #include <utime.h>
132 #else /* !defined(__BORLANDC__) */
133 #include <sys/utime.h>
134 #endif /* (__BORLANDC__) */
135 #else /* !defined(__WIN32__) */
136 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
137 #include <sys/utime.h>
138 #elif defined(__TURBOC__) /* BCC */
139 #include <utime.h>
140 #elif defined(LSI_C) /* LSI C */
141 #endif /* (__WIN32__) */
142 #endif
143 #endif
144 #endif
145
146 #define         FALSE   0
147 #define         TRUE    1
148
149 /* state of output_mode and input_mode
150
151    c2           0 means ASCII
152                 X0201
153                 ISO8859_1
154                 X0208
155                 EOF      all termination
156    c1           32bit data
157
158  */
159
160 #define         ASCII           0
161 #define         X0208           1
162 #define         X0201           2
163 #define         ISO8859_1       8
164 #define         X0212      0x2844
165 #define         X0213_1    0x284F
166 #define         X0213_2    0x2850
167
168 /* Input Assumption */
169
170 #define         JIS_INPUT       4
171 #define         EUC_INPUT      16
172 #define         SJIS_INPUT      5
173 #define         LATIN1_INPUT    6
174 #define         FIXED_MIME      7
175 #define         STRICT_MIME     8
176
177 /* MIME ENCODE */
178
179 #define         ISO2022JP       9
180 #define         JAPANESE_EUC   10
181 #define         SHIFT_JIS      11
182
183 #define         UTF8           12
184 #define         UTF8_INPUT     13
185 #define         UTF16_INPUT    1015
186 #define         UTF32_INPUT    1017
187
188 /* byte order */
189
190 #define         ENDIAN_BIG      1234
191 #define         ENDIAN_LITTLE   4321
192 #define         ENDIAN_2143     2143
193 #define         ENDIAN_3412     3412
194
195 /* ASCII CODE */
196
197 #define         BS      0x08
198 #define         TAB     0x09
199 #define         LF      0x0a
200 #define         CR      0x0d
201 #define         ESC     0x1b
202 #define         SP      0x20
203 #define         AT      0x40
204 #define         SSP     0xa0
205 #define         DEL     0x7f
206 #define         SI      0x0f
207 #define         SO      0x0e
208 #define         SSO     0x8e
209 #define         SS3     0x8f
210 #define         CRLF    0x0D0A
211
212 #define         is_alnum(c)  \
213             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
214
215 /* I don't trust portablity of toupper */
216 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
217 #define nkf_isoctal(c)  ('0'<=c && c<='7')
218 #define nkf_isdigit(c)  ('0'<=c && c<='9')
219 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
220 #define nkf_isblank(c) (c == SP || c == TAB)
221 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
222 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
223 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
224 #define nkf_isprint(c) (SP<=c && c<='~')
225 #define nkf_isgraph(c) ('!'<=c && c<='~')
226 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
227                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
228                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
229 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
230 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
231 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
232     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
233      && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
234
235 #define CP932_TABLE_BEGIN 0xFA
236 #define CP932_TABLE_END   0xFC
237 #define CP932INV_TABLE_BEGIN 0xED
238 #define CP932INV_TABLE_END   0xEE
239 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
240
241 #define         HOLD_SIZE       1024
242 #if defined(INT_IS_SHORT)
243 #define         IOBUF_SIZE      2048
244 #else
245 #define         IOBUF_SIZE      16384
246 #endif
247
248 #define         DEFAULT_J       'B'
249 #define         DEFAULT_R       'B'
250
251 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
252 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
253
254 #define         RANGE_NUM_MAX   18
255 #define         GETA1   0x22
256 #define         GETA2   0x2e
257
258
259 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
260 #define sizeof_euc_to_utf8_1byte 94
261 #define sizeof_euc_to_utf8_2bytes 94
262 #define sizeof_utf8_to_euc_C2 64
263 #define sizeof_utf8_to_euc_E5B8 64
264 #define sizeof_utf8_to_euc_2bytes 112
265 #define sizeof_utf8_to_euc_3bytes 16
266 #endif
267
268 /* MIME preprocessor */
269
270 #ifdef EASYWIN /*Easy Win */
271 extern POINT _BufferSize;
272 #endif
273
274 struct input_code{
275     char *name;
276     nkf_char stat;
277     nkf_char score;
278     nkf_char index;
279     nkf_char buf[3];
280     void (*status_func)(struct input_code *, nkf_char);
281     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
282     int _file_stat;
283 };
284
285 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
286
287 #ifndef PERL_XS
288 static const char *CopyRight = COPY_RIGHT;
289 #endif
290 #if !defined(PERL_XS) && !defined(WIN32DLL)
291 static  nkf_char     noconvert(FILE *f);
292 #endif
293 static  void    module_connection(void);
294 static  nkf_char     kanji_convert(FILE *f);
295 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
296 static  nkf_char     push_hold_buf(nkf_char c2);
297 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
298 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
299 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
300 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
301 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
302 /* UCS Mapping
303  * 0: Shift_JIS, eucJP-ascii
304  * 1: eucJP-ms
305  * 2: CP932, CP51932
306  * 3: CP10001
307  */
308 #define UCS_MAP_ASCII   0
309 #define UCS_MAP_MS      1
310 #define UCS_MAP_CP932   2
311 #define UCS_MAP_CP10001 3
312 static int ms_ucs_map_f = UCS_MAP_ASCII;
313 #endif
314 #ifdef UTF8_INPUT_ENABLE
315 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
316 static  int     no_cp932ext_f = FALSE;
317 /* ignore ZERO WIDTH NO-BREAK SPACE */
318 static  int     no_best_fit_chars_f = FALSE;
319 static  int     input_endian = ENDIAN_BIG;
320 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
321 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
322 static  void    encode_fallback_html(nkf_char c);
323 static  void    encode_fallback_xml(nkf_char c);
324 static  void    encode_fallback_java(nkf_char c);
325 static  void    encode_fallback_perl(nkf_char c);
326 static  void    encode_fallback_subchar(nkf_char c);
327 static  void    (*encode_fallback)(nkf_char c) = NULL;
328 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
329 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
330 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
331 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
332 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
333 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
334 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
335 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
336 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
337 static  void    w_status(struct input_code *, nkf_char);
338 #endif
339 #ifdef UTF8_OUTPUT_ENABLE
340 static  int     output_bom_f = FALSE;
341 static  int     output_endian = ENDIAN_BIG;
342 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
343 static  void    w_oconv(nkf_char c2,nkf_char c1);
344 static  void    w_oconv16(nkf_char c2,nkf_char c1);
345 static  void    w_oconv32(nkf_char c2,nkf_char c1);
346 #endif
347 static  void    e_oconv(nkf_char c2,nkf_char c1);
348 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
349 static  void    s_oconv(nkf_char c2,nkf_char c1);
350 static  void    j_oconv(nkf_char c2,nkf_char c1);
351 static  void    fold_conv(nkf_char c2,nkf_char c1);
352 static  void    nl_conv(nkf_char c2,nkf_char c1);
353 static  void    z_conv(nkf_char c2,nkf_char c1);
354 static  void    rot_conv(nkf_char c2,nkf_char c1);
355 static  void    hira_conv(nkf_char c2,nkf_char c1);
356 static  void    base64_conv(nkf_char c2,nkf_char c1);
357 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
358 static  void    no_connection(nkf_char c2,nkf_char c1);
359 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
360
361 static  void    code_score(struct input_code *ptr);
362 static  void    code_status(nkf_char c);
363
364 static  void    std_putc(nkf_char c);
365 static  nkf_char     std_getc(FILE *f);
366 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
367
368 static  nkf_char     broken_getc(FILE *f);
369 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
370
371 static  nkf_char     mime_begin(FILE *f);
372 static  nkf_char     mime_getc(FILE *f);
373 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
374
375 static  void    switch_mime_getc(void);
376 static  void    unswitch_mime_getc(void);
377 static  nkf_char     mime_begin_strict(FILE *f);
378 static  nkf_char     mime_getc_buf(FILE *f);
379 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
380 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
381
382 static  nkf_char     base64decode(nkf_char c);
383 static  void    mime_prechar(nkf_char c2, nkf_char c1);
384 static  void    mime_putc(nkf_char c);
385 static  void    open_mime(nkf_char c);
386 static  void    close_mime(void);
387 static  void    eof_mime(void);
388 static  void    mimeout_addchar(nkf_char c);
389 #ifndef PERL_XS
390 static  void    usage(void);
391 static  void    version(void);
392 static  void    show_configuration(void);
393 #endif
394 static  void    options(unsigned char *c);
395 static  void    reinit(void);
396
397 /* buffers */
398
399 #if !defined(PERL_XS) && !defined(WIN32DLL)
400 static unsigned char   stdibuf[IOBUF_SIZE];
401 static unsigned char   stdobuf[IOBUF_SIZE];
402 #endif
403 static unsigned char   hold_buf[HOLD_SIZE*2];
404 static int             hold_count = 0;
405
406 /* MIME preprocessor fifo */
407
408 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
409 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
410 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
411 static unsigned char           mime_buf[MIME_BUF_SIZE];
412 static unsigned int            mime_top = 0;
413 static unsigned int            mime_last = 0;  /* decoded */
414 static unsigned int            mime_input = 0; /* undecoded */
415 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
416
417 /* flags */
418 static int             unbuf_f = FALSE;
419 static int             estab_f = FALSE;
420 static int             nop_f = FALSE;
421 static int             binmode_f = TRUE;       /* binary mode */
422 static int             rot_f = FALSE;          /* rot14/43 mode */
423 static int             hira_f = FALSE;          /* hira/kata henkan */
424 static int             input_f = FALSE;        /* non fixed input code  */
425 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
426 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
427 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
428 static int             mimebuf_f = FALSE;      /* MIME buffered input */
429 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
430 static int             iso8859_f = FALSE;      /* ISO8859 through */
431 static int             mimeout_f = FALSE;       /* base64 mode */
432 static int             x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
433 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
434
435 #ifdef UNICODE_NORMALIZATION
436 static int nfc_f = FALSE;
437 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
438 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
439 static nkf_char nfc_getc(FILE *f);
440 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
441 #endif
442
443 #ifdef INPUT_OPTION
444 static int cap_f = FALSE;
445 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
446 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
447 static nkf_char cap_getc(FILE *f);
448 static nkf_char cap_ungetc(nkf_char c,FILE *f);
449
450 static int url_f = FALSE;
451 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
452 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
453 static nkf_char url_getc(FILE *f);
454 static nkf_char url_ungetc(nkf_char c,FILE *f);
455 #endif
456
457 #if defined(INT_IS_SHORT)
458 #define NKF_INT32_C(n)   (n##L)
459 #else
460 #define NKF_INT32_C(n)   (n)
461 #endif
462 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
463 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
464 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
465 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
466 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
467 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
468 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
469
470 #ifdef NUMCHAR_OPTION
471 static int numchar_f = FALSE;
472 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
473 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
474 static nkf_char numchar_getc(FILE *f);
475 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
476 #endif
477
478 #ifdef CHECK_OPTION
479 static int noout_f = FALSE;
480 static void no_putc(nkf_char c);
481 static int debug_f = FALSE;
482 static void debug(const char *str);
483 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
484 #endif
485
486 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
487 #if !defined PERL_XS
488 static  void    print_guessed_code(char *filename);
489 #endif
490 static  void    set_input_codename(char *codename);
491
492 #ifdef EXEC_IO
493 static int exec_f = 0;
494 #endif
495
496 #ifdef SHIFTJIS_CP932
497 /* invert IBM extended characters to others */
498 static int cp51932_f = FALSE;
499
500 /* invert NEC-selected IBM extended characters to IBM extended characters */
501 static int cp932inv_f = TRUE;
502
503 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
504 #endif /* SHIFTJIS_CP932 */
505
506 #ifdef X0212_ENABLE
507 static int x0212_f = FALSE;
508 static nkf_char x0212_shift(nkf_char c);
509 static nkf_char x0212_unshift(nkf_char c);
510 #endif
511 static int x0213_f = FALSE;
512
513 static unsigned char prefix_table[256];
514
515 static void set_code_score(struct input_code *ptr, nkf_char score);
516 static void clr_code_score(struct input_code *ptr, nkf_char score);
517 static void status_disable(struct input_code *ptr);
518 static void status_push_ch(struct input_code *ptr, nkf_char c);
519 static void status_clear(struct input_code *ptr);
520 static void status_reset(struct input_code *ptr);
521 static void status_reinit(struct input_code *ptr);
522 static void status_check(struct input_code *ptr, nkf_char c);
523 static void e_status(struct input_code *, nkf_char);
524 static void s_status(struct input_code *, nkf_char);
525
526 struct input_code input_code_list[] = {
527     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
528     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
529 #ifdef UTF8_INPUT_ENABLE
530     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
531     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
532     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
533 #endif
534     {0}
535 };
536
537 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
538 static int              base64_count = 0;
539
540 /* X0208 -> ASCII converter */
541
542 /* fold parameter */
543 static int             f_line = 0;    /* chars in line */
544 static int             f_prev = 0;
545 static int             fold_preserve_f = FALSE; /* preserve new lines */
546 static int             fold_f  = FALSE;
547 static int             fold_len  = 0;
548
549 /* options */
550 static unsigned char   kanji_intro = DEFAULT_J;
551 static unsigned char   ascii_intro = DEFAULT_R;
552
553 /* Folding */
554
555 #define FOLD_MARGIN  10
556 #define DEFAULT_FOLD 60
557
558 static int             fold_margin  = FOLD_MARGIN;
559
560 /* converters */
561
562 #ifdef DEFAULT_CODE_JIS
563 #   define  DEFAULT_CONV j_oconv
564 #endif
565 #ifdef DEFAULT_CODE_SJIS
566 #   define  DEFAULT_CONV s_oconv
567 #endif
568 #ifdef DEFAULT_CODE_EUC
569 #   define  DEFAULT_CONV e_oconv
570 #endif
571 #ifdef DEFAULT_CODE_UTF8
572 #   define  DEFAULT_CONV w_oconv
573 #endif
574
575 /* process default */
576 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
577
578 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
579 /* s_iconv or oconv */
580 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
581
582 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
583 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
584 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
585 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
586 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
587 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
588 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
589
590 /* static redirections */
591
592 static  void   (*o_putc)(nkf_char c) = std_putc;
593
594 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
595 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
596
597 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
598 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
599
600 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
601
602 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
603 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
604
605 /* for strict mime */
606 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
607 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
608
609 /* Global states */
610 static int output_mode = ASCII,    /* output kanji mode */
611            input_mode =  ASCII,    /* input kanji mode */
612            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
613 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
614
615 /* X0201 / X0208 conversion tables */
616
617 /* X0201 kana conversion table */
618 /* 90-9F A0-DF */
619 static const unsigned char cv[]= {
620     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
621     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
622     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
623     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
624     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
625     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
626     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
627     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
628     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
629     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
630     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
631     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
632     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
633     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
634     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
635     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
636     0x00,0x00};
637
638
639 /* X0201 kana conversion table for daguten */
640 /* 90-9F A0-DF */
641 static const unsigned char dv[]= {
642     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
646     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
647     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
648     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
649     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
650     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
651     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
652     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
653     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
654     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
656     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
657     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
658     0x00,0x00};
659
660 /* X0201 kana conversion table for han-daguten */
661 /* 90-9F A0-DF */
662 static const unsigned char ev[]= {
663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
672     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
674     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
675     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
677     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
678     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
679     0x00,0x00};
680
681
682 /* X0208 kigou conversion table */
683 /* 0x8140 - 0x819e */
684 static const unsigned char fv[] = {
685
686     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
687     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
688     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
689     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
690     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
691     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
692     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
693     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
694     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
695     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
696     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
697     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
698 } ;
699
700
701
702 static int             file_out_f = FALSE;
703 #ifdef OVERWRITE
704 static int             overwrite_f = FALSE;
705 static int             preserve_time_f = FALSE;
706 static int             backup_f = FALSE;
707 static char            *backup_suffix = "";
708 static char *get_backup_filename(const char *suffix, const char *filename);
709 #endif
710
711 static int nlmode_f = 0;   /* CR, LF, CRLF */
712 static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
713 static nkf_char prev_cr = 0; /* CR or 0 */
714 #ifdef EASYWIN /*Easy Win */
715 static int             end_check;
716 #endif /*Easy Win */
717
718 #define STD_GC_BUFSIZE (256)
719 nkf_char std_gc_buf[STD_GC_BUFSIZE];
720 nkf_char std_gc_ndx;
721
722 #ifdef WIN32DLL
723 #include "nkf32dll.c"
724 #elif defined(PERL_XS)
725 #else /* WIN32DLL */
726 int main(int argc, char **argv)
727 {
728     FILE  *fin;
729     unsigned char  *cp;
730
731     char *outfname = NULL;
732     char *origfname;
733
734 #ifdef EASYWIN /*Easy Win */
735     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
736 #endif
737
738     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
739         cp = (unsigned char *)*argv;
740         options(cp);
741         if (guess_f) {
742 #ifdef CHECK_OPTION
743             int debug_f_back = debug_f;
744 #endif
745 #ifdef EXEC_IO
746             int exec_f_back = exec_f;
747 #endif
748 #ifdef X0212_ENABLE
749             int x0212_f_back = x0212_f;
750 #endif
751             int x0213_f_back = x0213_f;
752             int guess_f_back = guess_f;
753             reinit();
754             guess_f = guess_f_back;
755             mime_f = FALSE;
756 #ifdef CHECK_OPTION
757             debug_f = debug_f_back;
758 #endif
759 #ifdef EXEC_IO
760             exec_f = exec_f_back;
761 #endif
762 #ifdef X0212_ENABLE
763             x0212_f = x0212_f_back;
764 #endif
765             x0213_f = x0213_f_back;
766         }
767 #ifdef EXEC_IO
768         if (exec_f){
769             int fds[2], pid;
770             if (pipe(fds) < 0 || (pid = fork()) < 0){
771                 abort();
772             }
773             if (pid == 0){
774                 if (exec_f > 0){
775                     close(fds[0]);
776                     dup2(fds[1], 1);
777                 }else{
778                     close(fds[1]);
779                     dup2(fds[0], 0);
780                 }
781                 execvp(argv[1], &argv[1]);
782             }
783             if (exec_f > 0){
784                 close(fds[1]);
785                 dup2(fds[0], 0);
786             }else{
787                 close(fds[0]);
788                 dup2(fds[1], 1);
789             }
790             argc = 0;
791             break;
792         }
793 #endif
794     }
795
796     if (binmode_f == TRUE)
797 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
798     if (freopen("","wb",stdout) == NULL)
799         return (-1);
800 #else
801     setbinmode(stdout);
802 #endif
803
804     if (unbuf_f)
805       setbuf(stdout, (char *) NULL);
806     else
807       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
808
809     if (argc == 0) {
810       if (binmode_f == TRUE)
811 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
812       if (freopen("","rb",stdin) == NULL) return (-1);
813 #else
814       setbinmode(stdin);
815 #endif
816       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
817       if (nop_f)
818           noconvert(stdin);
819       else {
820           kanji_convert(stdin);
821           if (guess_f) print_guessed_code(NULL);
822       }
823     } else {
824       int nfiles = argc;
825         int is_argument_error = FALSE;
826       while (argc--) {
827             input_codename = NULL;
828             input_newline = 0;
829 #ifdef CHECK_OPTION
830             iconv_for_check = 0;
831 #endif
832           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
833               perror(*--argv);
834                 *argv++;
835                 is_argument_error = TRUE;
836                 continue;
837           } else {
838 #ifdef OVERWRITE
839               int fd = 0;
840               int fd_backup = 0;
841 #endif
842
843 /* reopen file for stdout */
844               if (file_out_f == TRUE) {
845 #ifdef OVERWRITE
846                   if (overwrite_f){
847                       outfname = malloc(strlen(origfname)
848                                         + strlen(".nkftmpXXXXXX")
849                                         + 1);
850                       if (!outfname){
851                           perror(origfname);
852                           return -1;
853                       }
854                       strcpy(outfname, origfname);
855 #ifdef MSDOS
856                       {
857                           int i;
858                           for (i = strlen(outfname); i; --i){
859                               if (outfname[i - 1] == '/'
860                                   || outfname[i - 1] == '\\'){
861                                   break;
862                               }
863                           }
864                           outfname[i] = '\0';
865                       }
866                       strcat(outfname, "ntXXXXXX");
867                       mktemp(outfname);
868                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
869                                 S_IREAD | S_IWRITE);
870 #else
871                       strcat(outfname, ".nkftmpXXXXXX");
872                       fd = mkstemp(outfname);
873 #endif
874                       if (fd < 0
875                           || (fd_backup = dup(fileno(stdout))) < 0
876                           || dup2(fd, fileno(stdout)) < 0
877                           ){
878                           perror(origfname);
879                           return -1;
880                       }
881                   }else
882 #endif
883                   if(argc == 1) {
884                       outfname = *argv++;
885                       argc--;
886                   } else {
887                       outfname = "nkf.out";
888                   }
889
890                   if(freopen(outfname, "w", stdout) == NULL) {
891                       perror (outfname);
892                       return (-1);
893                   }
894                   if (binmode_f == TRUE) {
895 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
896                       if (freopen("","wb",stdout) == NULL)
897                            return (-1);
898 #else
899                       setbinmode(stdout);
900 #endif
901                   }
902               }
903               if (binmode_f == TRUE)
904 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
905                  if (freopen("","rb",fin) == NULL)
906                     return (-1);
907 #else
908                  setbinmode(fin);
909 #endif
910               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
911               if (nop_f)
912                   noconvert(fin);
913               else {
914                   char *filename = NULL;
915                   kanji_convert(fin);
916                   if (nfiles > 1) filename = origfname;
917                   if (guess_f) print_guessed_code(filename);
918               }
919               fclose(fin);
920 #ifdef OVERWRITE
921               if (overwrite_f) {
922                   struct stat     sb;
923 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
924                   time_t tb[2];
925 #else
926                   struct utimbuf  tb;
927 #endif
928
929                   fflush(stdout);
930                   close(fd);
931                   if (dup2(fd_backup, fileno(stdout)) < 0){
932                       perror("dup2");
933                   }
934                   if (stat(origfname, &sb)) {
935                       fprintf(stderr, "Can't stat %s\n", origfname);
936                   }
937                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
938                   if (chmod(outfname, sb.st_mode)) {
939                       fprintf(stderr, "Can't set permission %s\n", outfname);
940                   }
941
942                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
943                     if(preserve_time_f){
944 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
945                         tb[0] = tb[1] = sb.st_mtime;
946                         if (utime(outfname, tb)) {
947                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
948                         }
949 #else
950                         tb.actime  = sb.st_atime;
951                         tb.modtime = sb.st_mtime;
952                         if (utime(outfname, &tb)) {
953                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
954                         }
955 #endif
956                     }
957                     if(backup_f){
958                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
959 #ifdef MSDOS
960                         unlink(backup_filename);
961 #endif
962                         if (rename(origfname, backup_filename)) {
963                             perror(backup_filename);
964                             fprintf(stderr, "Can't rename %s to %s\n",
965                                     origfname, backup_filename);
966                         }
967                     }else{
968 #ifdef MSDOS
969                         if (unlink(origfname)){
970                             perror(origfname);
971                         }
972 #endif
973                     }
974                   if (rename(outfname, origfname)) {
975                       perror(origfname);
976                       fprintf(stderr, "Can't rename %s to %s\n",
977                               outfname, origfname);
978                   }
979                   free(outfname);
980               }
981 #endif
982           }
983       }
984         if (is_argument_error)
985             return(-1);
986     }
987 #ifdef EASYWIN /*Easy Win */
988     if (file_out_f == FALSE)
989         scanf("%d",&end_check);
990     else
991         fclose(stdout);
992 #else /* for Other OS */
993     if (file_out_f == TRUE)
994         fclose(stdout);
995 #endif /*Easy Win */
996     return (0);
997 }
998 #endif /* WIN32DLL */
999
1000 #ifdef OVERWRITE
1001 char *get_backup_filename(const char *suffix, const char *filename)
1002 {
1003     char *backup_filename;
1004     int asterisk_count = 0;
1005     int i, j;
1006     int filename_length = strlen(filename);
1007
1008     for(i = 0; suffix[i]; i++){
1009         if(suffix[i] == '*') asterisk_count++;
1010     }
1011
1012     if(asterisk_count){
1013         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1014         if (!backup_filename){
1015             perror("Can't malloc backup filename.");
1016             return NULL;
1017         }
1018
1019         for(i = 0, j = 0; suffix[i];){
1020             if(suffix[i] == '*'){
1021                 backup_filename[j] = '\0';
1022                 strncat(backup_filename, filename, filename_length);
1023                 i++;
1024                 j += filename_length;
1025             }else{
1026                 backup_filename[j++] = suffix[i++];
1027             }
1028         }
1029         backup_filename[j] = '\0';
1030     }else{
1031         j = strlen(suffix) + filename_length;
1032         backup_filename = malloc( + 1);
1033         strcpy(backup_filename, filename);
1034         strcat(backup_filename, suffix);
1035         backup_filename[j] = '\0';
1036     }
1037     return backup_filename;
1038 }
1039 #endif
1040
1041 static const struct {
1042     const char *name;
1043     const char *alias;
1044 } long_option[] = {
1045     {"ic=", ""},
1046     {"oc=", ""},
1047     {"base64","jMB"},
1048     {"euc","e"},
1049     {"euc-input","E"},
1050     {"fj","jm"},
1051     {"help","v"},
1052     {"jis","j"},
1053     {"jis-input","J"},
1054     {"mac","sLm"},
1055     {"mime","jM"},
1056     {"mime-input","m"},
1057     {"msdos","sLw"},
1058     {"sjis","s"},
1059     {"sjis-input","S"},
1060     {"unix","eLu"},
1061     {"version","V"},
1062     {"windows","sLw"},
1063     {"hiragana","h1"},
1064     {"katakana","h2"},
1065     {"katakana-hiragana","h3"},
1066     {"guess=", ""},
1067     {"guess", "g1"},
1068     {"cp932", ""},
1069     {"no-cp932", ""},
1070 #ifdef X0212_ENABLE
1071     {"x0212", ""},
1072 #endif
1073 #ifdef UTF8_OUTPUT_ENABLE
1074     {"utf8", "w"},
1075     {"utf16", "w16"},
1076     {"ms-ucs-map", ""},
1077     {"fb-skip", ""},
1078     {"fb-html", ""},
1079     {"fb-xml", ""},
1080     {"fb-perl", ""},
1081     {"fb-java", ""},
1082     {"fb-subchar", ""},
1083     {"fb-subchar=", ""},
1084 #endif
1085 #ifdef UTF8_INPUT_ENABLE
1086     {"utf8-input", "W"},
1087     {"utf16-input", "W16"},
1088     {"no-cp932ext", ""},
1089     {"no-best-fit-chars",""},
1090 #endif
1091 #ifdef UNICODE_NORMALIZATION
1092     {"utf8mac-input", ""},
1093 #endif
1094 #ifdef OVERWRITE
1095     {"overwrite", ""},
1096     {"overwrite=", ""},
1097     {"in-place", ""},
1098     {"in-place=", ""},
1099 #endif
1100 #ifdef INPUT_OPTION
1101     {"cap-input", ""},
1102     {"url-input", ""},
1103 #endif
1104 #ifdef NUMCHAR_OPTION
1105     {"numchar-input", ""},
1106 #endif
1107 #ifdef CHECK_OPTION
1108     {"no-output", ""},
1109     {"debug", ""},
1110 #endif
1111 #ifdef SHIFTJIS_CP932
1112     {"cp932inv", ""},
1113 #endif
1114 #ifdef EXEC_IO
1115     {"exec-in", ""},
1116     {"exec-out", ""},
1117 #endif
1118     {"prefix=", ""},
1119 };
1120
1121 static int option_mode = 0;
1122
1123 void options(unsigned char *cp)
1124 {
1125     nkf_char i, j;
1126     unsigned char *p;
1127     unsigned char *cp_back = NULL;
1128     char codeset[32];
1129
1130     if (option_mode==1)
1131         return;
1132     while(*cp && *cp++!='-');
1133     while (*cp || cp_back) {
1134         if(!*cp){
1135             cp = cp_back;
1136             cp_back = NULL;
1137             continue;
1138         }
1139         p = 0;
1140         switch (*cp++) {
1141         case '-':  /* literal options */
1142             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1143                 option_mode = 1;
1144                 return;
1145             }
1146             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1147                 p = (unsigned char *)long_option[i].name;
1148                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1149                 if (*p == cp[j] || cp[j] == SP){
1150                     p = &cp[j] + 1;
1151                     break;
1152                 }
1153                 p = 0;
1154             }
1155             if (p == 0) {
1156                 fprintf(stderr, "unknown long option: --%s\n", cp);
1157                 return;
1158             }
1159             while(*cp && *cp != SP && cp++);
1160             if (long_option[i].alias[0]){
1161                 cp_back = cp;
1162                 cp = (unsigned char *)long_option[i].alias;
1163             }else{
1164                 if (strcmp(long_option[i].name, "ic=") == 0){
1165                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1166                         codeset[i] = nkf_toupper(p[i]);
1167                     }
1168                     codeset[i] = 0;
1169                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1170                         input_f = JIS_INPUT;
1171                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1172                       strcmp(codeset, "CP50220") == 0 ||
1173                       strcmp(codeset, "CP50221") == 0 ||
1174                       strcmp(codeset, "CP50222") == 0){
1175                         input_f = JIS_INPUT;
1176 #ifdef SHIFTJIS_CP932
1177                         cp51932_f = TRUE;
1178 #endif
1179 #ifdef UTF8_OUTPUT_ENABLE
1180                         ms_ucs_map_f = UCS_MAP_CP932;
1181 #endif
1182                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1183                         input_f = JIS_INPUT;
1184 #ifdef X0212_ENABLE
1185                         x0212_f = TRUE;
1186 #endif
1187                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1188                         input_f = JIS_INPUT;
1189 #ifdef X0212_ENABLE
1190                         x0212_f = TRUE;
1191 #endif
1192                         x0213_f = TRUE;
1193                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1194                         input_f = SJIS_INPUT;
1195                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1196                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1197                              strcmp(codeset, "CP932") == 0 ||
1198                              strcmp(codeset, "MS932") == 0){
1199                         input_f = SJIS_INPUT;
1200 #ifdef SHIFTJIS_CP932
1201                         cp51932_f = TRUE;
1202 #endif
1203 #ifdef UTF8_OUTPUT_ENABLE
1204                         ms_ucs_map_f = UCS_MAP_CP932;
1205 #endif
1206                     }else if(strcmp(codeset, "CP10001") == 0){
1207                         input_f = SJIS_INPUT;
1208 #ifdef SHIFTJIS_CP932
1209                         cp51932_f = TRUE;
1210 #endif
1211 #ifdef UTF8_OUTPUT_ENABLE
1212                         ms_ucs_map_f = UCS_MAP_CP10001;
1213 #endif
1214                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1215                              strcmp(codeset, "EUC-JP") == 0){
1216                         input_f = EUC_INPUT;
1217                     }else if(strcmp(codeset, "CP51932") == 0){
1218                         input_f = EUC_INPUT;
1219 #ifdef SHIFTJIS_CP932
1220                         cp51932_f = TRUE;
1221 #endif
1222 #ifdef UTF8_OUTPUT_ENABLE
1223                         ms_ucs_map_f = UCS_MAP_CP932;
1224 #endif
1225                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1226                              strcmp(codeset, "EUCJP-MS") == 0 ||
1227                              strcmp(codeset, "EUCJPMS") == 0){
1228                         input_f = EUC_INPUT;
1229 #ifdef SHIFTJIS_CP932
1230                         cp51932_f = FALSE;
1231 #endif
1232 #ifdef UTF8_OUTPUT_ENABLE
1233                         ms_ucs_map_f = UCS_MAP_MS;
1234 #endif
1235                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1236                              strcmp(codeset, "EUCJP-ASCII") == 0){
1237                         input_f = EUC_INPUT;
1238 #ifdef SHIFTJIS_CP932
1239                         cp51932_f = FALSE;
1240 #endif
1241 #ifdef UTF8_OUTPUT_ENABLE
1242                         ms_ucs_map_f = UCS_MAP_ASCII;
1243 #endif
1244                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1245                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1246                         input_f = SJIS_INPUT;
1247                         x0213_f = TRUE;
1248 #ifdef SHIFTJIS_CP932
1249                         cp51932_f = FALSE;
1250 #endif
1251                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1252                              strcmp(codeset, "EUC-JIS-2004") == 0){
1253                         input_f = EUC_INPUT;
1254                         x0213_f = TRUE;
1255 #ifdef SHIFTJIS_CP932
1256                         cp51932_f = FALSE;
1257 #endif
1258 #ifdef UTF8_INPUT_ENABLE
1259                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1260                              strcmp(codeset, "UTF-8N") == 0 ||
1261                              strcmp(codeset, "UTF-8-BOM") == 0){
1262                         input_f = UTF8_INPUT;
1263 #ifdef UNICODE_NORMALIZATION
1264                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1265                              strcmp(codeset, "UTF-8-MAC") == 0){
1266                         input_f = UTF8_INPUT;
1267                         nfc_f = TRUE;
1268 #endif
1269                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1270                              strcmp(codeset, "UTF-16BE") == 0 ||
1271                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1272                         input_f = UTF16_INPUT;
1273                         input_endian = ENDIAN_BIG;
1274                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1275                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1276                         input_f = UTF16_INPUT;
1277                         input_endian = ENDIAN_LITTLE;
1278                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1279                              strcmp(codeset, "UTF-32BE") == 0 ||
1280                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1281                         input_f = UTF32_INPUT;
1282                         input_endian = ENDIAN_BIG;
1283                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1284                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1285                         input_f = UTF32_INPUT;
1286                         input_endian = ENDIAN_LITTLE;
1287 #endif
1288                     } else {
1289                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1290                     }
1291                     continue;
1292                 }
1293                 if (strcmp(long_option[i].name, "oc=") == 0){
1294                     x0201_f = FALSE;
1295                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1296                         codeset[i] = nkf_toupper(p[i]);
1297                     }
1298                     codeset[i] = 0;
1299                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1300                         output_conv = j_oconv;
1301                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1302                         output_conv = j_oconv;
1303                         no_cp932ext_f = TRUE;
1304 #ifdef SHIFTJIS_CP932
1305                         cp932inv_f = FALSE;
1306 #endif
1307 #ifdef UTF8_OUTPUT_ENABLE
1308                         ms_ucs_map_f = UCS_MAP_CP932;
1309 #endif
1310                     }else if(strcmp(codeset, "CP50220") == 0){
1311                         output_conv = j_oconv;
1312                         x0201_f = TRUE;
1313 #ifdef SHIFTJIS_CP932
1314                         cp932inv_f = FALSE;
1315 #endif
1316 #ifdef UTF8_OUTPUT_ENABLE
1317                         ms_ucs_map_f = UCS_MAP_CP932;
1318 #endif
1319                     }else if(strcmp(codeset, "CP50221") == 0){
1320                         output_conv = j_oconv;
1321 #ifdef SHIFTJIS_CP932
1322                         cp932inv_f = FALSE;
1323 #endif
1324 #ifdef UTF8_OUTPUT_ENABLE
1325                         ms_ucs_map_f = UCS_MAP_CP932;
1326 #endif
1327                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1328                         output_conv = j_oconv;
1329 #ifdef X0212_ENABLE
1330                         x0212_f = TRUE;
1331 #endif
1332 #ifdef SHIFTJIS_CP932
1333                         cp932inv_f = FALSE;
1334 #endif
1335                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1336                         output_conv = j_oconv;
1337 #ifdef X0212_ENABLE
1338                         x0212_f = TRUE;
1339 #endif
1340                         x0213_f = TRUE;
1341 #ifdef SHIFTJIS_CP932
1342                         cp932inv_f = FALSE;
1343 #endif
1344                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1345                         output_conv = s_oconv;
1346                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1347                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1348                              strcmp(codeset, "CP932") == 0 ||
1349                              strcmp(codeset, "MS932") == 0){
1350                         output_conv = s_oconv;
1351 #ifdef UTF8_OUTPUT_ENABLE
1352                         ms_ucs_map_f = UCS_MAP_CP932;
1353 #endif
1354                     }else if(strcmp(codeset, "CP10001") == 0){
1355                         output_conv = s_oconv;
1356 #ifdef UTF8_OUTPUT_ENABLE
1357                         ms_ucs_map_f = UCS_MAP_CP10001;
1358 #endif
1359                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1360                              strcmp(codeset, "EUC-JP") == 0){
1361                         output_conv = e_oconv;
1362                     }else if(strcmp(codeset, "CP51932") == 0){
1363                         output_conv = e_oconv;
1364 #ifdef SHIFTJIS_CP932
1365                         cp932inv_f = FALSE;
1366 #endif
1367 #ifdef UTF8_OUTPUT_ENABLE
1368                         ms_ucs_map_f = UCS_MAP_CP932;
1369 #endif
1370                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1371                              strcmp(codeset, "EUCJP-MS") == 0 ||
1372                              strcmp(codeset, "EUCJPMS") == 0){
1373                         output_conv = e_oconv;
1374 #ifdef X0212_ENABLE
1375                         x0212_f = TRUE;
1376 #endif
1377 #ifdef UTF8_OUTPUT_ENABLE
1378                         ms_ucs_map_f = UCS_MAP_MS;
1379 #endif
1380                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1381                              strcmp(codeset, "EUCJP-ASCII") == 0){
1382                         output_conv = e_oconv;
1383 #ifdef X0212_ENABLE
1384                         x0212_f = TRUE;
1385 #endif
1386 #ifdef UTF8_OUTPUT_ENABLE
1387                         ms_ucs_map_f = UCS_MAP_ASCII;
1388 #endif
1389                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1390                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1391                         output_conv = s_oconv;
1392                         x0213_f = TRUE;
1393 #ifdef SHIFTJIS_CP932
1394                         cp932inv_f = FALSE;
1395 #endif
1396                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1397                              strcmp(codeset, "EUC-JIS-2004") == 0){
1398                         output_conv = e_oconv;
1399 #ifdef X0212_ENABLE
1400                         x0212_f = TRUE;
1401 #endif
1402                         x0213_f = TRUE;
1403 #ifdef SHIFTJIS_CP932
1404                         cp932inv_f = FALSE;
1405 #endif
1406 #ifdef UTF8_OUTPUT_ENABLE
1407                     }else if(strcmp(codeset, "UTF-8") == 0){
1408                         output_conv = w_oconv;
1409                     }else if(strcmp(codeset, "UTF-8N") == 0){
1410                         output_conv = w_oconv;
1411                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1412                         output_conv = w_oconv;
1413                         output_bom_f = TRUE;
1414                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1415                         output_conv = w_oconv16;
1416                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1417                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1418                         output_conv = w_oconv16;
1419                         output_bom_f = TRUE;
1420                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1421                         output_conv = w_oconv16;
1422                         output_endian = ENDIAN_LITTLE;
1423                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1424                         output_conv = w_oconv16;
1425                         output_endian = ENDIAN_LITTLE;
1426                         output_bom_f = TRUE;
1427                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1428                              strcmp(codeset, "UTF-32BE") == 0){
1429                         output_conv = w_oconv32;
1430                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1431                         output_conv = w_oconv32;
1432                         output_bom_f = TRUE;
1433                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1434                         output_conv = w_oconv32;
1435                         output_endian = ENDIAN_LITTLE;
1436                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1437                         output_conv = w_oconv32;
1438                         output_endian = ENDIAN_LITTLE;
1439                         output_bom_f = TRUE;
1440 #endif
1441                     } else {
1442                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1443                     }
1444                     continue;
1445                 }
1446                 if (strcmp(long_option[i].name, "guess=") == 0){
1447                     if (p[0] == '1') {
1448                         guess_f = 2;
1449                     } else {
1450                         guess_f = 1;
1451                     }
1452                     continue;
1453                 }
1454 #ifdef OVERWRITE
1455                 if (strcmp(long_option[i].name, "overwrite") == 0){
1456                     file_out_f = TRUE;
1457                     overwrite_f = TRUE;
1458                     preserve_time_f = TRUE;
1459                     continue;
1460                 }
1461                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1462                     file_out_f = TRUE;
1463                     overwrite_f = TRUE;
1464                     preserve_time_f = TRUE;
1465                     backup_f = TRUE;
1466                     backup_suffix = malloc(strlen((char *) p) + 1);
1467                     strcpy(backup_suffix, (char *) p);
1468                     continue;
1469                 }
1470                 if (strcmp(long_option[i].name, "in-place") == 0){
1471                     file_out_f = TRUE;
1472                     overwrite_f = TRUE;
1473                     preserve_time_f = FALSE;
1474                     continue;
1475                 }
1476                 if (strcmp(long_option[i].name, "in-place=") == 0){
1477                     file_out_f = TRUE;
1478                     overwrite_f = TRUE;
1479                     preserve_time_f = FALSE;
1480                     backup_f = TRUE;
1481                     backup_suffix = malloc(strlen((char *) p) + 1);
1482                     strcpy(backup_suffix, (char *) p);
1483                     continue;
1484                 }
1485 #endif
1486 #ifdef INPUT_OPTION
1487                 if (strcmp(long_option[i].name, "cap-input") == 0){
1488                     cap_f = TRUE;
1489                     continue;
1490                 }
1491                 if (strcmp(long_option[i].name, "url-input") == 0){
1492                     url_f = TRUE;
1493                     continue;
1494                 }
1495 #endif
1496 #ifdef NUMCHAR_OPTION
1497                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1498                     numchar_f = TRUE;
1499                     continue;
1500                 }
1501 #endif
1502 #ifdef CHECK_OPTION
1503                 if (strcmp(long_option[i].name, "no-output") == 0){
1504                     noout_f = TRUE;
1505                     continue;
1506                 }
1507                 if (strcmp(long_option[i].name, "debug") == 0){
1508                     debug_f = TRUE;
1509                     continue;
1510                 }
1511 #endif
1512                 if (strcmp(long_option[i].name, "cp932") == 0){
1513 #ifdef SHIFTJIS_CP932
1514                     cp51932_f = TRUE;
1515                     cp932inv_f = TRUE;
1516 #endif
1517 #ifdef UTF8_OUTPUT_ENABLE
1518                     ms_ucs_map_f = UCS_MAP_CP932;
1519 #endif
1520                     continue;
1521                 }
1522                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1523 #ifdef SHIFTJIS_CP932
1524                     cp51932_f = FALSE;
1525                     cp932inv_f = FALSE;
1526 #endif
1527 #ifdef UTF8_OUTPUT_ENABLE
1528                     ms_ucs_map_f = UCS_MAP_ASCII;
1529 #endif
1530                     continue;
1531                 }
1532 #ifdef SHIFTJIS_CP932
1533                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1534                     cp932inv_f = TRUE;
1535                     continue;
1536                 }
1537 #endif
1538
1539 #ifdef X0212_ENABLE
1540                 if (strcmp(long_option[i].name, "x0212") == 0){
1541                     x0212_f = TRUE;
1542                     continue;
1543                 }
1544 #endif
1545
1546 #ifdef EXEC_IO
1547                   if (strcmp(long_option[i].name, "exec-in") == 0){
1548                       exec_f = 1;
1549                       return;
1550                   }
1551                   if (strcmp(long_option[i].name, "exec-out") == 0){
1552                       exec_f = -1;
1553                       return;
1554                   }
1555 #endif
1556 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1557                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1558                     no_cp932ext_f = TRUE;
1559                     continue;
1560                 }
1561                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1562                     no_best_fit_chars_f = TRUE;
1563                     continue;
1564                 }
1565                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1566                     encode_fallback = NULL;
1567                     continue;
1568                 }
1569                 if (strcmp(long_option[i].name, "fb-html") == 0){
1570                     encode_fallback = encode_fallback_html;
1571                     continue;
1572                 }
1573                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1574                     encode_fallback = encode_fallback_xml;
1575                     continue;
1576                 }
1577                 if (strcmp(long_option[i].name, "fb-java") == 0){
1578                     encode_fallback = encode_fallback_java;
1579                     continue;
1580                 }
1581                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1582                     encode_fallback = encode_fallback_perl;
1583                     continue;
1584                 }
1585                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1586                     encode_fallback = encode_fallback_subchar;
1587                     continue;
1588                 }
1589                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1590                     encode_fallback = encode_fallback_subchar;
1591                     unicode_subchar = 0;
1592                     if (p[0] != '0'){
1593                         /* decimal number */
1594                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1595                             unicode_subchar *= 10;
1596                             unicode_subchar += hex2bin(p[i]);
1597                         }
1598                     }else if(p[1] == 'x' || p[1] == 'X'){
1599                         /* hexadecimal number */
1600                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1601                             unicode_subchar <<= 4;
1602                             unicode_subchar |= hex2bin(p[i]);
1603                         }
1604                     }else{
1605                         /* octal number */
1606                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1607                             unicode_subchar *= 8;
1608                             unicode_subchar += hex2bin(p[i]);
1609                         }
1610                     }
1611                     w16e_conv(unicode_subchar, &i, &j);
1612                     unicode_subchar = i<<8 | j;
1613                     continue;
1614                 }
1615 #endif
1616 #ifdef UTF8_OUTPUT_ENABLE
1617                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1618                     ms_ucs_map_f = UCS_MAP_MS;
1619                     continue;
1620                 }
1621 #endif
1622 #ifdef UNICODE_NORMALIZATION
1623                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1624                     input_f = UTF8_INPUT;
1625                     nfc_f = TRUE;
1626                     continue;
1627                 }
1628 #endif
1629                 if (strcmp(long_option[i].name, "prefix=") == 0){
1630                     if (nkf_isgraph(p[0])){
1631                         for (i = 1; nkf_isgraph(p[i]); i++){
1632                             prefix_table[p[i]] = p[0];
1633                         }
1634                     }
1635                     continue;
1636                 }
1637             }
1638             continue;
1639         case 'b':           /* buffered mode */
1640             unbuf_f = FALSE;
1641             continue;
1642         case 'u':           /* non bufferd mode */
1643             unbuf_f = TRUE;
1644             continue;
1645         case 't':           /* transparent mode */
1646             if (*cp=='1') {
1647                 /* alias of -t */
1648                 nop_f = TRUE;
1649                 *cp++;
1650             } else if (*cp=='2') {
1651                 /*
1652                  * -t with put/get
1653                  *
1654                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1655                  *
1656                  */
1657                 nop_f = 2;
1658                 *cp++;
1659             } else
1660                 nop_f = TRUE;
1661             continue;
1662         case 'j':           /* JIS output */
1663         case 'n':
1664             output_conv = j_oconv;
1665             continue;
1666         case 'e':           /* AT&T EUC output */
1667             output_conv = e_oconv;
1668             cp932inv_f = FALSE;
1669             continue;
1670         case 's':           /* SJIS output */
1671             output_conv = s_oconv;
1672             continue;
1673         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1674             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1675             input_f = LATIN1_INPUT;
1676             continue;
1677         case 'i':           /* Kanji IN ESC-$-@/B */
1678             if (*cp=='@'||*cp=='B')
1679                 kanji_intro = *cp++;
1680             continue;
1681         case 'o':           /* ASCII IN ESC-(-J/B */
1682             if (*cp=='J'||*cp=='B'||*cp=='H')
1683                 ascii_intro = *cp++;
1684             continue;
1685         case 'h':
1686             /*
1687                 bit:1   katakana->hiragana
1688                 bit:2   hiragana->katakana
1689             */
1690             if ('9'>= *cp && *cp>='0')
1691                 hira_f |= (*cp++ -'0');
1692             else
1693                 hira_f |= 1;
1694             continue;
1695         case 'r':
1696             rot_f = TRUE;
1697             continue;
1698 #if defined(MSDOS) || defined(__OS2__)
1699         case 'T':
1700             binmode_f = FALSE;
1701             continue;
1702 #endif
1703 #ifndef PERL_XS
1704         case 'V':
1705             show_configuration();
1706             exit(1);
1707             break;
1708         case 'v':
1709             usage();
1710             exit(1);
1711             break;
1712 #endif
1713 #ifdef UTF8_OUTPUT_ENABLE
1714         case 'w':           /* UTF-8 output */
1715             if (cp[0] == '8') {
1716                 output_conv = w_oconv; cp++;
1717                 if (cp[0] == '0'){
1718                     cp++;
1719                 } else {
1720                     output_bom_f = TRUE;
1721                 }
1722             } else {
1723                 if ('1'== cp[0] && '6'==cp[1]) {
1724                     output_conv = w_oconv16; cp+=2;
1725                 } else if ('3'== cp[0] && '2'==cp[1]) {
1726                     output_conv = w_oconv32; cp+=2;
1727                 } else {
1728                     output_conv = w_oconv;
1729                     continue;
1730                 }
1731                 if (cp[0]=='L') {
1732                     cp++;
1733                     output_endian = ENDIAN_LITTLE;
1734                 } else if (cp[0] == 'B') {
1735                     cp++;
1736                 } else {
1737                     continue;
1738                 }
1739                 if (cp[0] == '0'){
1740                     cp++;
1741                 } else {
1742                     output_bom_f = TRUE;
1743                 }
1744             }
1745             continue;
1746 #endif
1747 #ifdef UTF8_INPUT_ENABLE
1748         case 'W':           /* UTF input */
1749             if (cp[0] == '8') {
1750                 cp++;
1751                 input_f = UTF8_INPUT;
1752             }else{
1753                 if ('1'== cp[0] && '6'==cp[1]) {
1754                     cp += 2;
1755                     input_f = UTF16_INPUT;
1756                     input_endian = ENDIAN_BIG;
1757                 } else if ('3'== cp[0] && '2'==cp[1]) {
1758                     cp += 2;
1759                     input_f = UTF32_INPUT;
1760                     input_endian = ENDIAN_BIG;
1761                 } else {
1762                     input_f = UTF8_INPUT;
1763                     continue;
1764                 }
1765                 if (cp[0]=='L') {
1766                     cp++;
1767                     input_endian = ENDIAN_LITTLE;
1768                 } else if (cp[0] == 'B') {
1769                     cp++;
1770                 }
1771             }
1772             continue;
1773 #endif
1774         /* Input code assumption */
1775         case 'J':   /* JIS input */
1776             input_f = JIS_INPUT;
1777             continue;
1778         case 'E':   /* AT&T EUC input */
1779             input_f = EUC_INPUT;
1780             continue;
1781         case 'S':   /* MS Kanji input */
1782             input_f = SJIS_INPUT;
1783             continue;
1784         case 'Z':   /* Convert X0208 alphabet to asii */
1785             /* alpha_f
1786                bit:0   Convert JIS X 0208 Alphabet to ASCII
1787                bit:1   Convert Kankaku to one space
1788                bit:2   Convert Kankaku to two spaces
1789                bit:3   Convert HTML Entity
1790                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1791             */
1792             while ('0'<= *cp && *cp <='9') {
1793                 alpha_f |= 1 << (*cp++ - '0');
1794             }
1795             if (!alpha_f) alpha_f = 1;
1796             continue;
1797         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1798             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1799             /* accept  X0201
1800                     ESC-(-I     in JIS, EUC, MS Kanji
1801                     SI/SO       in JIS, EUC, MS Kanji
1802                     SSO         in EUC, JIS, not in MS Kanji
1803                     MS Kanji (0xa0-0xdf)
1804                output  X0201
1805                     ESC-(-I     in JIS (0x20-0x5f)
1806                     SSO         in EUC (0xa0-0xdf)
1807                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1808             */
1809             continue;
1810         case 'X':   /* Convert X0201 kana to X0208 */
1811             x0201_f = TRUE;
1812             continue;
1813         case 'F':   /* prserve new lines */
1814             fold_preserve_f = TRUE;
1815         case 'f':   /* folding -f60 or -f */
1816             fold_f = TRUE;
1817             fold_len = 0;
1818             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1819                 fold_len *= 10;
1820                 fold_len += *cp++ - '0';
1821             }
1822             if (!(0<fold_len && fold_len<BUFSIZ))
1823                 fold_len = DEFAULT_FOLD;
1824             if (*cp=='-') {
1825                 fold_margin = 0;
1826                 cp++;
1827                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1828                     fold_margin *= 10;
1829                     fold_margin += *cp++ - '0';
1830                 }
1831             }
1832             continue;
1833         case 'm':   /* MIME support */
1834             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1835             if (*cp=='B'||*cp=='Q') {
1836                 mime_decode_mode = *cp++;
1837                 mimebuf_f = FIXED_MIME;
1838             } else if (*cp=='N') {
1839                 mime_f = TRUE; cp++;
1840             } else if (*cp=='S') {
1841                 mime_f = STRICT_MIME; cp++;
1842             } else if (*cp=='0') {
1843                 mime_decode_f = FALSE;
1844                 mime_f = FALSE; cp++;
1845             }
1846             continue;
1847         case 'M':   /* MIME output */
1848             if (*cp=='B') {
1849                 mimeout_mode = 'B';
1850                 mimeout_f = FIXED_MIME; cp++;
1851             } else if (*cp=='Q') {
1852                 mimeout_mode = 'Q';
1853                 mimeout_f = FIXED_MIME; cp++;
1854             } else {
1855                 mimeout_f = TRUE;
1856             }
1857             continue;
1858         case 'B':   /* Broken JIS support */
1859             /*  bit:0   no ESC JIS
1860                 bit:1   allow any x on ESC-(-x or ESC-$-x
1861                 bit:2   reset to ascii on NL
1862             */
1863             if ('9'>= *cp && *cp>='0')
1864                 broken_f |= 1<<(*cp++ -'0');
1865             else
1866                 broken_f |= TRUE;
1867             continue;
1868 #ifndef PERL_XS
1869         case 'O':/* for Output file */
1870             file_out_f = TRUE;
1871             continue;
1872 #endif
1873         case 'c':/* add cr code */
1874             nlmode_f = CRLF;
1875             continue;
1876         case 'd':/* delete cr code */
1877             nlmode_f = LF;
1878             continue;
1879         case 'I':   /* ISO-2022-JP output */
1880             iso2022jp_f = TRUE;
1881             continue;
1882         case 'L':  /* line mode */
1883             if (*cp=='u') {         /* unix */
1884                 nlmode_f = LF; cp++;
1885             } else if (*cp=='m') { /* mac */
1886                 nlmode_f = CR; cp++;
1887             } else if (*cp=='w') { /* windows */
1888                 nlmode_f = CRLF; cp++;
1889             } else if (*cp=='0') { /* no conversion  */
1890                 nlmode_f = 0; cp++;
1891             }
1892             continue;
1893 #ifndef PERL_XS
1894         case 'g':
1895             if (*cp == '1') {
1896                 guess_f = 2;
1897                 cp++;
1898             } else if (*cp == '0') {
1899                 guess_f = 1;
1900                 cp++;
1901             } else {
1902                 guess_f = 1;
1903             }
1904             continue;
1905 #endif
1906         case SP:
1907         /* module muliple options in a string are allowed for Perl moudle  */
1908             while(*cp && *cp++!='-');
1909             continue;
1910         default:
1911             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
1912             /* bogus option but ignored */
1913             continue;
1914         }
1915     }
1916 }
1917
1918 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1919 {
1920     if (iconv_func){
1921         struct input_code *p = input_code_list;
1922         while (p->name){
1923             if (iconv_func == p->iconv_func){
1924                 return p;
1925             }
1926             p++;
1927         }
1928     }
1929     return 0;
1930 }
1931
1932 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1933 {
1934 #ifdef INPUT_CODE_FIX
1935     if (f || !input_f)
1936 #endif
1937         if (estab_f != f){
1938             estab_f = f;
1939         }
1940
1941     if (iconv_func
1942 #ifdef INPUT_CODE_FIX
1943         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1944 #endif
1945         ){
1946         iconv = iconv_func;
1947     }
1948 #ifdef CHECK_OPTION
1949     if (estab_f && iconv_for_check != iconv){
1950         struct input_code *p = find_inputcode_byfunc(iconv);
1951         if (p){
1952             set_input_codename(p->name);
1953             debug(p->name);
1954         }
1955         iconv_for_check = iconv;
1956     }
1957 #endif
1958 }
1959
1960 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1961 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1962 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1963 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
1964 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
1965 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1966 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1967 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1968
1969 #define SCORE_INIT (SCORE_iMIME)
1970
1971 static const char score_table_A0[] = {
1972     0, 0, 0, 0,
1973     0, 0, 0, 0,
1974     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1975     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1976 };
1977
1978 static const char score_table_F0[] = {
1979     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1980     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1981     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
1982     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1983 };
1984
1985 void set_code_score(struct input_code *ptr, nkf_char score)
1986 {
1987     if (ptr){
1988         ptr->score |= score;
1989     }
1990 }
1991
1992 void clr_code_score(struct input_code *ptr, nkf_char score)
1993 {
1994     if (ptr){
1995         ptr->score &= ~score;
1996     }
1997 }
1998
1999 void code_score(struct input_code *ptr)
2000 {
2001     nkf_char c2 = ptr->buf[0];
2002 #ifdef UTF8_OUTPUT_ENABLE
2003     nkf_char c1 = ptr->buf[1];
2004 #endif
2005     if (c2 < 0){
2006         set_code_score(ptr, SCORE_ERROR);
2007     }else if (c2 == SSO){
2008         set_code_score(ptr, SCORE_KANA);
2009     }else if (c2 == 0x8f){
2010         set_code_score(ptr, SCORE_X0212);
2011 #ifdef UTF8_OUTPUT_ENABLE
2012     }else if (!e2w_conv(c2, c1)){
2013         set_code_score(ptr, SCORE_NO_EXIST);
2014 #endif
2015     }else if ((c2 & 0x70) == 0x20){
2016         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2017     }else if ((c2 & 0x70) == 0x70){
2018         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2019     }else if ((c2 & 0x70) >= 0x50){
2020         set_code_score(ptr, SCORE_L2);
2021     }
2022 }
2023
2024 void status_disable(struct input_code *ptr)
2025 {
2026     ptr->stat = -1;
2027     ptr->buf[0] = -1;
2028     code_score(ptr);
2029     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2030 }
2031
2032 void status_push_ch(struct input_code *ptr, nkf_char c)
2033 {
2034     ptr->buf[ptr->index++] = c;
2035 }
2036
2037 void status_clear(struct input_code *ptr)
2038 {
2039     ptr->stat = 0;
2040     ptr->index = 0;
2041 }
2042
2043 void status_reset(struct input_code *ptr)
2044 {
2045     status_clear(ptr);
2046     ptr->score = SCORE_INIT;
2047 }
2048
2049 void status_reinit(struct input_code *ptr)
2050 {
2051     status_reset(ptr);
2052     ptr->_file_stat = 0;
2053 }
2054
2055 void status_check(struct input_code *ptr, nkf_char c)
2056 {
2057     if (c <= DEL && estab_f){
2058         status_reset(ptr);
2059     }
2060 }
2061
2062 void s_status(struct input_code *ptr, nkf_char c)
2063 {
2064     switch(ptr->stat){
2065       case -1:
2066           status_check(ptr, c);
2067           break;
2068       case 0:
2069           if (c <= DEL){
2070               break;
2071 #ifdef NUMCHAR_OPTION
2072           }else if (is_unicode_capsule(c)){
2073               break;
2074 #endif
2075           }else if (0xa1 <= c && c <= 0xdf){
2076               status_push_ch(ptr, SSO);
2077               status_push_ch(ptr, c);
2078               code_score(ptr);
2079               status_clear(ptr);
2080           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2081               ptr->stat = 1;
2082               status_push_ch(ptr, c);
2083           }else if (0xed <= c && c <= 0xee){
2084               ptr->stat = 3;
2085               status_push_ch(ptr, c);
2086 #ifdef SHIFTJIS_CP932
2087           }else if (is_ibmext_in_sjis(c)){
2088               ptr->stat = 2;
2089               status_push_ch(ptr, c);
2090 #endif /* SHIFTJIS_CP932 */
2091 #ifdef X0212_ENABLE
2092           }else if (0xf0 <= c && c <= 0xfc){
2093               ptr->stat = 1;
2094               status_push_ch(ptr, c);
2095 #endif /* X0212_ENABLE */
2096           }else{
2097               status_disable(ptr);
2098           }
2099           break;
2100       case 1:
2101           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2102               status_push_ch(ptr, c);
2103               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2104               code_score(ptr);
2105               status_clear(ptr);
2106           }else{
2107               status_disable(ptr);
2108           }
2109           break;
2110       case 2:
2111 #ifdef SHIFTJIS_CP932
2112         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2113             status_push_ch(ptr, c);
2114             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2115                 set_code_score(ptr, SCORE_CP932);
2116                 status_clear(ptr);
2117                 break;
2118             }
2119         }
2120 #endif /* SHIFTJIS_CP932 */
2121         status_disable(ptr);
2122           break;
2123       case 3:
2124           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2125               status_push_ch(ptr, c);
2126               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2127             set_code_score(ptr, SCORE_CP932);
2128             status_clear(ptr);
2129           }else{
2130               status_disable(ptr);
2131           }
2132           break;
2133     }
2134 }
2135
2136 void e_status(struct input_code *ptr, nkf_char c)
2137 {
2138     switch (ptr->stat){
2139       case -1:
2140           status_check(ptr, c);
2141           break;
2142       case 0:
2143           if (c <= DEL){
2144               break;
2145 #ifdef NUMCHAR_OPTION
2146           }else if (is_unicode_capsule(c)){
2147               break;
2148 #endif
2149           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2150               ptr->stat = 1;
2151               status_push_ch(ptr, c);
2152 #ifdef X0212_ENABLE
2153           }else if (0x8f == c){
2154               ptr->stat = 2;
2155               status_push_ch(ptr, c);
2156 #endif /* X0212_ENABLE */
2157           }else{
2158               status_disable(ptr);
2159           }
2160           break;
2161       case 1:
2162           if (0xa1 <= c && c <= 0xfe){
2163               status_push_ch(ptr, c);
2164               code_score(ptr);
2165               status_clear(ptr);
2166           }else{
2167               status_disable(ptr);
2168           }
2169           break;
2170 #ifdef X0212_ENABLE
2171       case 2:
2172           if (0xa1 <= c && c <= 0xfe){
2173               ptr->stat = 1;
2174               status_push_ch(ptr, c);
2175           }else{
2176               status_disable(ptr);
2177           }
2178 #endif /* X0212_ENABLE */
2179     }
2180 }
2181
2182 #ifdef UTF8_INPUT_ENABLE
2183 void w_status(struct input_code *ptr, nkf_char c)
2184 {
2185     switch (ptr->stat){
2186       case -1:
2187           status_check(ptr, c);
2188           break;
2189       case 0:
2190           if (c <= DEL){
2191               break;
2192 #ifdef NUMCHAR_OPTION
2193           }else if (is_unicode_capsule(c)){
2194               break;
2195 #endif
2196           }else if (0xc0 <= c && c <= 0xdf){
2197               ptr->stat = 1;
2198               status_push_ch(ptr, c);
2199           }else if (0xe0 <= c && c <= 0xef){
2200               ptr->stat = 2;
2201               status_push_ch(ptr, c);
2202           }else if (0xf0 <= c && c <= 0xf4){
2203               ptr->stat = 3;
2204               status_push_ch(ptr, c);
2205           }else{
2206               status_disable(ptr);
2207           }
2208           break;
2209       case 1:
2210       case 2:
2211           if (0x80 <= c && c <= 0xbf){
2212               status_push_ch(ptr, c);
2213               if (ptr->index > ptr->stat){
2214                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2215                              && ptr->buf[2] == 0xbf);
2216                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2217                            &ptr->buf[0], &ptr->buf[1]);
2218                   if (!bom){
2219                       code_score(ptr);
2220                   }
2221                   status_clear(ptr);
2222               }
2223           }else{
2224               status_disable(ptr);
2225           }
2226           break;
2227       case 3:
2228         if (0x80 <= c && c <= 0xbf){
2229             if (ptr->index < ptr->stat){
2230                 status_push_ch(ptr, c);
2231             } else {
2232                 status_clear(ptr);
2233             }
2234           }else{
2235               status_disable(ptr);
2236           }
2237           break;
2238     }
2239 }
2240 #endif
2241
2242 void code_status(nkf_char c)
2243 {
2244     int action_flag = 1;
2245     struct input_code *result = 0;
2246     struct input_code *p = input_code_list;
2247     while (p->name){
2248         if (!p->status_func) {
2249             ++p;
2250             continue;
2251         }
2252         if (!p->status_func)
2253             continue;
2254         (p->status_func)(p, c);
2255         if (p->stat > 0){
2256             action_flag = 0;
2257         }else if(p->stat == 0){
2258             if (result){
2259                 action_flag = 0;
2260             }else{
2261                 result = p;
2262             }
2263         }
2264         ++p;
2265     }
2266
2267     if (action_flag){
2268         if (result && !estab_f){
2269             set_iconv(TRUE, result->iconv_func);
2270         }else if (c <= DEL){
2271             struct input_code *ptr = input_code_list;
2272             while (ptr->name){
2273                 status_reset(ptr);
2274                 ++ptr;
2275             }
2276         }
2277     }
2278 }
2279
2280 #ifndef WIN32DLL
2281 nkf_char std_getc(FILE *f)
2282 {
2283     if (std_gc_ndx){
2284         return std_gc_buf[--std_gc_ndx];
2285     }
2286     return getc(f);
2287 }
2288 #endif /*WIN32DLL*/
2289
2290 nkf_char std_ungetc(nkf_char c, FILE *f)
2291 {
2292     if (std_gc_ndx == STD_GC_BUFSIZE){
2293         return EOF;
2294     }
2295     std_gc_buf[std_gc_ndx++] = c;
2296     return c;
2297 }
2298
2299 #ifndef WIN32DLL
2300 void std_putc(nkf_char c)
2301 {
2302     if(c!=EOF)
2303       putchar(c);
2304 }
2305 #endif /*WIN32DLL*/
2306
2307 #if !defined(PERL_XS) && !defined(WIN32DLL)
2308 nkf_char noconvert(FILE *f)
2309 {
2310     nkf_char    c;
2311
2312     if (nop_f == 2)
2313         module_connection();
2314     while ((c = (*i_getc)(f)) != EOF)
2315       (*o_putc)(c);
2316     (*o_putc)(EOF);
2317     return 1;
2318 }
2319 #endif
2320
2321 void module_connection(void)
2322 {
2323     oconv = output_conv;
2324     o_putc = std_putc;
2325
2326     /* replace continucation module, from output side */
2327
2328     /* output redicrection */
2329 #ifdef CHECK_OPTION
2330     if (noout_f || guess_f){
2331         o_putc = no_putc;
2332     }
2333 #endif
2334     if (mimeout_f) {
2335         o_mputc = o_putc;
2336         o_putc = mime_putc;
2337         if (mimeout_f == TRUE) {
2338             o_base64conv = oconv; oconv = base64_conv;
2339         }
2340         /* base64_count = 0; */
2341     }
2342
2343     if (nlmode_f || guess_f) {
2344         o_nlconv = oconv; oconv = nl_conv;
2345     }
2346     if (rot_f) {
2347         o_rot_conv = oconv; oconv = rot_conv;
2348     }
2349     if (iso2022jp_f) {
2350         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2351     }
2352     if (hira_f) {
2353         o_hira_conv = oconv; oconv = hira_conv;
2354     }
2355     if (fold_f) {
2356         o_fconv = oconv; oconv = fold_conv;
2357         f_line = 0;
2358     }
2359     if (alpha_f || x0201_f) {
2360         o_zconv = oconv; oconv = z_conv;
2361     }
2362
2363     i_getc = std_getc;
2364     i_ungetc = std_ungetc;
2365     /* input redicrection */
2366 #ifdef INPUT_OPTION
2367     if (cap_f){
2368         i_cgetc = i_getc; i_getc = cap_getc;
2369         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2370     }
2371     if (url_f){
2372         i_ugetc = i_getc; i_getc = url_getc;
2373         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2374     }
2375 #endif
2376 #ifdef NUMCHAR_OPTION
2377     if (numchar_f){
2378         i_ngetc = i_getc; i_getc = numchar_getc;
2379         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2380     }
2381 #endif
2382 #ifdef UNICODE_NORMALIZATION
2383     if (nfc_f && input_f == UTF8_INPUT){
2384         i_nfc_getc = i_getc; i_getc = nfc_getc;
2385         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2386     }
2387 #endif
2388     if (mime_f && mimebuf_f==FIXED_MIME) {
2389         i_mgetc = i_getc; i_getc = mime_getc;
2390         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2391     }
2392     if (broken_f & 1) {
2393         i_bgetc = i_getc; i_getc = broken_getc;
2394         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2395     }
2396     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2397         set_iconv(-TRUE, e_iconv);
2398     } else if (input_f == SJIS_INPUT) {
2399         set_iconv(-TRUE, s_iconv);
2400 #ifdef UTF8_INPUT_ENABLE
2401     } else if (input_f == UTF8_INPUT) {
2402         set_iconv(-TRUE, w_iconv);
2403     } else if (input_f == UTF16_INPUT) {
2404         set_iconv(-TRUE, w_iconv16);
2405     } else if (input_f == UTF32_INPUT) {
2406         set_iconv(-TRUE, w_iconv32);
2407 #endif
2408     } else {
2409         set_iconv(FALSE, e_iconv);
2410     }
2411
2412     {
2413         struct input_code *p = input_code_list;
2414         while (p->name){
2415             status_reinit(p++);
2416         }
2417     }
2418 }
2419
2420 /*
2421  * Check and Ignore BOM
2422  */
2423 void check_bom(FILE *f)
2424 {
2425     int c2;
2426     switch(c2 = (*i_getc)(f)){
2427     case 0x00:
2428         if((c2 = (*i_getc)(f)) == 0x00){
2429             if((c2 = (*i_getc)(f)) == 0xFE){
2430                 if((c2 = (*i_getc)(f)) == 0xFF){
2431                     if(!input_f){
2432                         set_iconv(TRUE, w_iconv32);
2433                     }
2434                     if (iconv == w_iconv32) {
2435                         input_endian = ENDIAN_BIG;
2436                         return;
2437                     }
2438                     (*i_ungetc)(0xFF,f);
2439                 }else (*i_ungetc)(c2,f);
2440                 (*i_ungetc)(0xFE,f);
2441             }else if(c2 == 0xFF){
2442                 if((c2 = (*i_getc)(f)) == 0xFE){
2443                     if(!input_f){
2444                         set_iconv(TRUE, w_iconv32);
2445                     }
2446                     if (iconv == w_iconv32) {
2447                         input_endian = ENDIAN_2143;
2448                         return;
2449                     }
2450                     (*i_ungetc)(0xFF,f);
2451                 }else (*i_ungetc)(c2,f);
2452                 (*i_ungetc)(0xFF,f);
2453             }else (*i_ungetc)(c2,f);
2454             (*i_ungetc)(0x00,f);
2455         }else (*i_ungetc)(c2,f);
2456         (*i_ungetc)(0x00,f);
2457         break;
2458     case 0xEF:
2459         if((c2 = (*i_getc)(f)) == 0xBB){
2460             if((c2 = (*i_getc)(f)) == 0xBF){
2461                 if(!input_f){
2462                     set_iconv(TRUE, w_iconv);
2463                 }
2464                 if (iconv == w_iconv) {
2465                     return;
2466                 }
2467                 (*i_ungetc)(0xBF,f);
2468             }else (*i_ungetc)(c2,f);
2469             (*i_ungetc)(0xBB,f);
2470         }else (*i_ungetc)(c2,f);
2471         (*i_ungetc)(0xEF,f);
2472         break;
2473     case 0xFE:
2474         if((c2 = (*i_getc)(f)) == 0xFF){
2475             if((c2 = (*i_getc)(f)) == 0x00){
2476                 if((c2 = (*i_getc)(f)) == 0x00){
2477                     if(!input_f){
2478                         set_iconv(TRUE, w_iconv32);
2479                     }
2480                     if (iconv == w_iconv32) {
2481                         input_endian = ENDIAN_3412;
2482                         return;
2483                     }
2484                     (*i_ungetc)(0x00,f);
2485                 }else (*i_ungetc)(c2,f);
2486                 (*i_ungetc)(0x00,f);
2487             }else (*i_ungetc)(c2,f);
2488             if(!input_f){
2489                 set_iconv(TRUE, w_iconv16);
2490             }
2491             if (iconv == w_iconv16) {
2492                 input_endian = ENDIAN_BIG;
2493                 return;
2494             }
2495             (*i_ungetc)(0xFF,f);
2496         }else (*i_ungetc)(c2,f);
2497         (*i_ungetc)(0xFE,f);
2498         break;
2499     case 0xFF:
2500         if((c2 = (*i_getc)(f)) == 0xFE){
2501             if((c2 = (*i_getc)(f)) == 0x00){
2502                 if((c2 = (*i_getc)(f)) == 0x00){
2503                     if(!input_f){
2504                         set_iconv(TRUE, w_iconv32);
2505                     }
2506                     if (iconv == w_iconv32) {
2507                         input_endian = ENDIAN_LITTLE;
2508                         return;
2509                     }
2510                     (*i_ungetc)(0x00,f);
2511                 }else (*i_ungetc)(c2,f);
2512                 (*i_ungetc)(0x00,f);
2513             }else (*i_ungetc)(c2,f);
2514             if(!input_f){
2515                 set_iconv(TRUE, w_iconv16);
2516             }
2517             if (iconv == w_iconv16) {
2518                 input_endian = ENDIAN_LITTLE;
2519                 return;
2520             }
2521             (*i_ungetc)(0xFE,f);
2522         }else (*i_ungetc)(c2,f);
2523         (*i_ungetc)(0xFF,f);
2524         break;
2525     default:
2526         (*i_ungetc)(c2,f);
2527         break;
2528     }
2529 }
2530
2531 /*
2532    Conversion main loop. Code detection only.
2533  */
2534
2535 nkf_char kanji_convert(FILE *f)
2536 {
2537     nkf_char    c3, c2=0, c1, c0=0;
2538     int is_8bit = FALSE;
2539
2540     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2541 #ifdef UTF8_INPUT_ENABLE
2542        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2543 #endif
2544       ){
2545         is_8bit = TRUE;
2546     }
2547
2548     input_mode = ASCII;
2549     output_mode = ASCII;
2550     shift_mode = FALSE;
2551
2552 #define NEXT continue      /* no output, get next */
2553 #define SEND ;             /* output c1 and c2, get next */
2554 #define LAST break         /* end of loop, go closing  */
2555
2556     module_connection();
2557     check_bom(f);
2558
2559     while ((c1 = (*i_getc)(f)) != EOF) {
2560 #ifdef INPUT_CODE_FIX
2561         if (!input_f)
2562 #endif
2563             code_status(c1);
2564         if (c2) {
2565             /* second byte */
2566             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2567                 /* in case of 8th bit is on */
2568                 if (!estab_f&&!mime_decode_mode) {
2569                     /* in case of not established yet */
2570                     /* It is still ambiguious */
2571                     if (h_conv(f, c2, c1)==EOF)
2572                         LAST;
2573                     else
2574                         c2 = 0;
2575                     NEXT;
2576                 } else {
2577                     /* in case of already established */
2578                     if (c1 < AT) {
2579                         /* ignore bogus code and not CP5022x UCD */
2580                         c2 = 0;
2581                         NEXT;
2582                     } else {
2583                         SEND;
2584                     }
2585                 }
2586             } else
2587                 /* second byte, 7 bit code */
2588                 /* it might be kanji shitfted */
2589                 if ((c1 == DEL) || (c1 <= SP)) {
2590                     /* ignore bogus first code */
2591                     c2 = 0;
2592                     NEXT;
2593                 } else
2594                     SEND;
2595         } else {
2596             /* first byte */
2597 #ifdef UTF8_INPUT_ENABLE
2598             if (iconv == w_iconv16) {
2599                 if (input_endian == ENDIAN_BIG) {
2600                     c2 = c1;
2601                     if ((c1 = (*i_getc)(f)) != EOF) {
2602                         if (0xD8 <= c2 && c2 <= 0xDB) {
2603                             if ((c0 = (*i_getc)(f)) != EOF) {
2604                                 c0 <<= 8;
2605                                 if ((c3 = (*i_getc)(f)) != EOF) {
2606                                     c0 |= c3;
2607                                 } else c2 = EOF;
2608                             } else c2 = EOF;
2609                         }
2610                     } else c2 = EOF;
2611                 } else {
2612                     if ((c2 = (*i_getc)(f)) != EOF) {
2613                         if (0xD8 <= c2 && c2 <= 0xDB) {
2614                             if ((c3 = (*i_getc)(f)) != EOF) {
2615                                 if ((c0 = (*i_getc)(f)) != EOF) {
2616                                     c0 <<= 8;
2617                                     c0 |= c3;
2618                                 } else c2 = EOF;
2619                             } else c2 = EOF;
2620                         }
2621                     } else c2 = EOF;
2622                 }
2623                 SEND;
2624             } else if(iconv == w_iconv32){
2625                 int c3 = c1;
2626                 if((c2 = (*i_getc)(f)) != EOF &&
2627                    (c1 = (*i_getc)(f)) != EOF &&
2628                    (c0 = (*i_getc)(f)) != EOF){
2629                     switch(input_endian){
2630                     case ENDIAN_BIG:
2631                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2632                         break;
2633                     case ENDIAN_LITTLE:
2634                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2635                         break;
2636                     case ENDIAN_2143:
2637                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2638                         break;
2639                     case ENDIAN_3412:
2640                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2641                         break;
2642                     }
2643                     c2 = 0;
2644                 }else{
2645                     c2 = EOF;
2646                 }
2647                 SEND;
2648             } else
2649 #endif
2650 #ifdef NUMCHAR_OPTION
2651             if (is_unicode_capsule(c1)){
2652                 SEND;
2653             } else
2654 #endif
2655             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2656                 /* 8 bit code */
2657                 if (!estab_f && !iso8859_f) {
2658                     /* not established yet */
2659                     c2 = c1;
2660                     NEXT;
2661                 } else { /* estab_f==TRUE */
2662                     if (iso8859_f) {
2663                         c2 = ISO8859_1;
2664                         c1 &= 0x7f;
2665                         SEND;
2666                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2667                         /* SJIS X0201 Case... */
2668                         if (iso2022jp_f && !x0201_f) {
2669                             (*oconv)(GETA1, GETA2);
2670                             NEXT;
2671                         } else {
2672                             c2 = X0201;
2673                             c1 &= 0x7f;
2674                             SEND;
2675                         }
2676                     } else if (c1==SSO && iconv != s_iconv) {
2677                         /* EUC X0201 Case */
2678                         c1 = (*i_getc)(f);  /* skip SSO */
2679                         code_status(c1);
2680                         if (SSP<=c1 && c1<0xe0) {
2681                             if (iso2022jp_f && !x0201_f) {
2682                                 (*oconv)(GETA1, GETA2);
2683                                 NEXT;
2684                             } else {
2685                                 c2 = X0201;
2686                                 c1 &= 0x7f;
2687                                 SEND;
2688                             }
2689                         } else  { /* bogus code, skip SSO and one byte */
2690                             NEXT;
2691                         }
2692                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2693                                (c1 == 0xFD || c1 == 0xFE)) {
2694                         /* CP10001 */
2695                         c2 = X0201;
2696                         c1 &= 0x7f;
2697                         SEND;
2698                     } else {
2699                        /* already established */
2700                        c2 = c1;
2701                        NEXT;
2702                     }
2703                 }
2704             } else if ((c1 > SP) && (c1 != DEL)) {
2705                 /* in case of Roman characters */
2706                 if (shift_mode) {
2707                     /* output 1 shifted byte */
2708                     if (iso8859_f) {
2709                         c2 = ISO8859_1;
2710                         SEND;
2711                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2712                       /* output 1 shifted byte */
2713                         if (iso2022jp_f && !x0201_f) {
2714                             (*oconv)(GETA1, GETA2);
2715                             NEXT;
2716                         } else {
2717                             c2 = X0201;
2718                             SEND;
2719                         }
2720                     } else {
2721                         /* look like bogus code */
2722                         NEXT;
2723                     }
2724                 } else if (input_mode == X0208 || input_mode == X0212 ||
2725                            input_mode == X0213_1 || input_mode == X0213_2) {
2726                     /* in case of Kanji shifted */
2727                     c2 = c1;
2728                     NEXT;
2729                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2730                     /* Check MIME code */
2731                     if ((c1 = (*i_getc)(f)) == EOF) {
2732                         (*oconv)(0, '=');
2733                         LAST;
2734                     } else if (c1 == '?') {
2735                         /* =? is mime conversion start sequence */
2736                         if(mime_f == STRICT_MIME) {
2737                             /* check in real detail */
2738                             if (mime_begin_strict(f) == EOF)
2739                                 LAST;
2740                             else
2741                                 NEXT;
2742                         } else if (mime_begin(f) == EOF)
2743                             LAST;
2744                         else
2745                             NEXT;
2746                     } else {
2747                         (*oconv)(0, '=');
2748                         (*i_ungetc)(c1,f);
2749                         NEXT;
2750                     }
2751                 } else {
2752                     /* normal ASCII code */
2753                     SEND;
2754                 }
2755             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2756                 shift_mode = FALSE;
2757                 NEXT;
2758             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2759                 shift_mode = TRUE;
2760                 NEXT;
2761             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2762                 if ((c1 = (*i_getc)(f)) == EOF) {
2763                     /*  (*oconv)(0, ESC); don't send bogus code */
2764                     LAST;
2765                 } else if (c1 == '$') {
2766                     if ((c1 = (*i_getc)(f)) == EOF) {
2767                         /*
2768                         (*oconv)(0, ESC); don't send bogus code
2769                         (*oconv)(0, '$'); */
2770                         LAST;
2771                     } else if (c1 == '@'|| c1 == 'B') {
2772                         /* This is kanji introduction */
2773                         input_mode = X0208;
2774                         shift_mode = FALSE;
2775                         set_input_codename("ISO-2022-JP");
2776 #ifdef CHECK_OPTION
2777                         debug("ISO-2022-JP");
2778 #endif
2779                         NEXT;
2780                     } else if (c1 == '(') {
2781                         if ((c1 = (*i_getc)(f)) == EOF) {
2782                             /* don't send bogus code
2783                             (*oconv)(0, ESC);
2784                             (*oconv)(0, '$');
2785                             (*oconv)(0, '(');
2786                                 */
2787                             LAST;
2788                         } else if (c1 == '@'|| c1 == 'B') {
2789                             /* This is kanji introduction */
2790                             input_mode = X0208;
2791                             shift_mode = FALSE;
2792                             NEXT;
2793 #ifdef X0212_ENABLE
2794                         } else if (c1 == 'D'){
2795                             input_mode = X0212;
2796                             shift_mode = FALSE;
2797                             NEXT;
2798 #endif /* X0212_ENABLE */
2799                         } else if (c1 == (X0213_1&0x7F)){
2800                             input_mode = X0213_1;
2801                             shift_mode = FALSE;
2802                             NEXT;
2803                         } else if (c1 == (X0213_2&0x7F)){
2804                             input_mode = X0213_2;
2805                             shift_mode = FALSE;
2806                             NEXT;
2807                         } else {
2808                             /* could be some special code */
2809                             (*oconv)(0, ESC);
2810                             (*oconv)(0, '$');
2811                             (*oconv)(0, '(');
2812                             (*oconv)(0, c1);
2813                             NEXT;
2814                         }
2815                     } else if (broken_f&0x2) {
2816                         /* accept any ESC-(-x as broken code ... */
2817                         input_mode = X0208;
2818                         shift_mode = FALSE;
2819                         NEXT;
2820                     } else {
2821                         (*oconv)(0, ESC);
2822                         (*oconv)(0, '$');
2823                         (*oconv)(0, c1);
2824                         NEXT;
2825                     }
2826                 } else if (c1 == '(') {
2827                     if ((c1 = (*i_getc)(f)) == EOF) {
2828                         /* don't send bogus code
2829                         (*oconv)(0, ESC);
2830                         (*oconv)(0, '('); */
2831                         LAST;
2832                     } else {
2833                         if (c1 == 'I') {
2834                             /* This is X0201 kana introduction */
2835                             input_mode = X0201; shift_mode = X0201;
2836                             NEXT;
2837                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2838                             /* This is X0208 kanji introduction */
2839                             input_mode = ASCII; shift_mode = FALSE;
2840                             NEXT;
2841                         } else if (broken_f&0x2) {
2842                             input_mode = ASCII; shift_mode = FALSE;
2843                             NEXT;
2844                         } else {
2845                             (*oconv)(0, ESC);
2846                             (*oconv)(0, '(');
2847                             /* maintain various input_mode here */
2848                             SEND;
2849                         }
2850                     }
2851                } else if ( c1 == 'N' || c1 == 'n'){
2852                    /* SS2 */
2853                    c3 = (*i_getc)(f);  /* skip SS2 */
2854                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2855                        c1 = c3;
2856                        c2 = X0201;
2857                        SEND;
2858                    }else{
2859                        (*i_ungetc)(c3, f);
2860                        /* lonely ESC  */
2861                        (*oconv)(0, ESC);
2862                        SEND;
2863                    }
2864                 } else {
2865                     /* lonely ESC  */
2866                     (*oconv)(0, ESC);
2867                     SEND;
2868                 }
2869             } else if (c1 == ESC && iconv == s_iconv) {
2870                 /* ESC in Shift_JIS */
2871                 if ((c1 = (*i_getc)(f)) == EOF) {
2872                     /*  (*oconv)(0, ESC); don't send bogus code */
2873                     LAST;
2874                 } else if (c1 == '$') {
2875                     /* J-PHONE emoji */
2876                     if ((c1 = (*i_getc)(f)) == EOF) {
2877                         /*
2878                            (*oconv)(0, ESC); don't send bogus code
2879                            (*oconv)(0, '$'); */
2880                         LAST;
2881                     } else {
2882                         if (('E' <= c1 && c1 <= 'G') ||
2883                             ('O' <= c1 && c1 <= 'Q')) {
2884                             /*
2885                                NUM : 0 1 2 3 4 5
2886                                BYTE: G E F O P Q
2887                                C%7 : 1 6 0 2 3 4
2888                                C%7 : 0 1 2 3 4 5 6
2889                                NUM : 2 0 3 4 5 X 1
2890                              */
2891                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2892                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2893                             while ((c1 = (*i_getc)(f)) != EOF) {
2894                                 if (SP <= c1 && c1 <= 'z') {
2895                                     (*oconv)(0, c1 + c0);
2896                                 } else break; /* c1 == SO */
2897                             }
2898                         }
2899                     }
2900                     if (c1 == EOF) LAST;
2901                     NEXT;
2902                 } else {
2903                     /* lonely ESC  */
2904                     (*oconv)(0, ESC);
2905                     SEND;
2906                 }
2907             } else if (c1 == LF || c1 == CR) {
2908                 if (broken_f&4) {
2909                     input_mode = ASCII; set_iconv(FALSE, 0);
2910                     SEND;
2911                 } else if (mime_decode_f && !mime_decode_mode){
2912                     if (c1 == LF) {
2913                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2914                             i_ungetc(SP,f);
2915                             continue;
2916                         } else {
2917                             i_ungetc(c1,f);
2918                         }
2919                         c1 = LF;
2920                         SEND;
2921                     } else  { /* if (c1 == CR)*/
2922                         if ((c1=(*i_getc)(f))!=EOF) {
2923                             if (c1==SP) {
2924                                 i_ungetc(SP,f);
2925                                 continue;
2926                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
2927                                 i_ungetc(SP,f);
2928                                 continue;
2929                             } else {
2930                                 i_ungetc(c1,f);
2931                             }
2932                             i_ungetc(LF,f);
2933                         } else {
2934                             i_ungetc(c1,f);
2935                         }
2936                         c1 = CR;
2937                         SEND;
2938                     }
2939                 }
2940             } else if (c1 == DEL && input_mode == X0208) {
2941                 /* CP5022x */
2942                 c2 = c1;
2943                 NEXT;
2944             } else
2945                 SEND;
2946         }
2947         /* send: */
2948         switch(input_mode){
2949         case ASCII:
2950             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2951             case -2:
2952                 /* 4 bytes UTF-8 */
2953                 if ((c0 = (*i_getc)(f)) != EOF) {
2954                     code_status(c0);
2955                     c0 <<= 8;
2956                     if ((c3 = (*i_getc)(f)) != EOF) {
2957                         code_status(c3);
2958                         (*iconv)(c2, c1, c0|c3);
2959                     }
2960                 }
2961                 break;
2962             case -1:
2963                 /* 3 bytes EUC or UTF-8 */
2964                 if ((c0 = (*i_getc)(f)) != EOF) {
2965                     code_status(c0);
2966                     (*iconv)(c2, c1, c0);
2967                 }
2968                 break;
2969             }
2970             break;
2971         case X0208:
2972         case X0213_1:
2973             if (ms_ucs_map_f &&
2974                 0x7F <= c2 && c2 <= 0x92 &&
2975                 0x21 <= c1 && c1 <= 0x7E) {
2976                 /* CP932 UDC */
2977                 if(c1 == 0x7F) return 0;
2978                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2979                 c2 = 0;
2980             }
2981             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2982             break;
2983 #ifdef X0212_ENABLE
2984         case X0212:
2985             (*oconv)(PREFIX_EUCG3 | c2, c1);
2986             break;
2987 #endif /* X0212_ENABLE */
2988         case X0213_2:
2989             (*oconv)(PREFIX_EUCG3 | c2, c1);
2990             break;
2991         default:
2992             (*oconv)(input_mode, c1);  /* other special case */
2993         }
2994
2995         c2 = 0;
2996         c0 = 0;
2997         continue;
2998         /* goto next_word */
2999     }
3000
3001     /* epilogue */
3002     (*iconv)(EOF, 0, 0);
3003     if (!input_codename)
3004     {
3005         if (is_8bit) {
3006             struct input_code *p = input_code_list;
3007             struct input_code *result = p;
3008             while (p->name){
3009                 if (p->score < result->score) result = p;
3010                 ++p;
3011             }
3012             set_input_codename(result->name);
3013 #ifdef CHECK_OPTION
3014             debug(result->name);
3015 #endif
3016         }
3017     }
3018     return 1;
3019 }
3020
3021 nkf_char
3022 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3023 {
3024     nkf_char ret, c3, c0;
3025     int hold_index;
3026
3027
3028     /** it must NOT be in the kanji shifte sequence      */
3029     /** it must NOT be written in JIS7                   */
3030     /** and it must be after 2 byte 8bit code            */
3031
3032     hold_count = 0;
3033     push_hold_buf(c2);
3034     push_hold_buf(c1);
3035
3036     while ((c1 = (*i_getc)(f)) != EOF) {
3037         if (c1 == ESC){
3038             (*i_ungetc)(c1,f);
3039             break;
3040         }
3041         code_status(c1);
3042         if (push_hold_buf(c1) == EOF || estab_f){
3043             break;
3044         }
3045     }
3046
3047     if (!estab_f){
3048         struct input_code *p = input_code_list;
3049         struct input_code *result = p;
3050         if (c1 == EOF){
3051             code_status(c1);
3052         }
3053         while (p->name){
3054             if (p->status_func && p->score < result->score){
3055                 result = p;
3056             }
3057             ++p;
3058         }
3059         set_iconv(TRUE, result->iconv_func);
3060     }
3061
3062
3063     /** now,
3064      ** 1) EOF is detected, or
3065      ** 2) Code is established, or
3066      ** 3) Buffer is FULL (but last word is pushed)
3067      **
3068      ** in 1) and 3) cases, we continue to use
3069      ** Kanji codes by oconv and leave estab_f unchanged.
3070      **/
3071
3072     ret = c1;
3073     hold_index = 0;
3074     while (hold_index < hold_count){
3075         c2 = hold_buf[hold_index++];
3076         if (c2 <= DEL
3077 #ifdef NUMCHAR_OPTION
3078             || is_unicode_capsule(c2)
3079 #endif
3080             ){
3081             (*iconv)(0, c2, 0);
3082             continue;
3083         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3084             (*iconv)(X0201, c2, 0);
3085             continue;
3086         }
3087         if (hold_index < hold_count){
3088             c1 = hold_buf[hold_index++];
3089         }else{
3090             c1 = (*i_getc)(f);
3091             if (c1 == EOF){
3092                 c3 = EOF;
3093                 break;
3094             }
3095             code_status(c1);
3096         }
3097         c0 = 0;
3098         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3099         case -2:
3100             /* 4 bytes UTF-8 */
3101             if (hold_index < hold_count){
3102                 c0 = hold_buf[hold_index++];
3103             } else if ((c0 = (*i_getc)(f)) == EOF) {
3104                 ret = EOF;
3105                 break;
3106             } else {
3107                 code_status(c0);
3108                 c0 <<= 8;
3109                 if (hold_index < hold_count){
3110                     c3 = hold_buf[hold_index++];
3111                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3112                     c0 = ret = EOF;
3113                     break;
3114                 } else {
3115                     code_status(c3);
3116                     (*iconv)(c2, c1, c0|c3);
3117                 }
3118             }
3119             break;
3120         case -1:
3121             /* 3 bytes EUC or UTF-8 */
3122             if (hold_index < hold_count){
3123                 c0 = hold_buf[hold_index++];
3124             } else if ((c0 = (*i_getc)(f)) == EOF) {
3125                 ret = EOF;
3126                 break;
3127             } else {
3128                 code_status(c0);
3129             }
3130             (*iconv)(c2, c1, c0);
3131             break;
3132         }
3133         if (c0 == EOF) break;
3134     }
3135     return ret;
3136 }
3137
3138 nkf_char push_hold_buf(nkf_char c2)
3139 {
3140     if (hold_count >= HOLD_SIZE*2)
3141         return (EOF);
3142     hold_buf[hold_count++] = (unsigned char)c2;
3143     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3144 }
3145
3146 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3147 {
3148 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3149     nkf_char val;
3150 #endif
3151     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3152 #ifdef SHIFTJIS_CP932
3153     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3154         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3155         if (val){
3156             c2 = val >> 8;
3157             c1 = val & 0xff;
3158         }
3159     }
3160     if (cp932inv_f
3161         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3162         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3163         if (c){
3164             c2 = c >> 8;
3165             c1 = c & 0xff;
3166         }
3167     }
3168 #endif /* SHIFTJIS_CP932 */
3169 #ifdef X0212_ENABLE
3170     if (!x0213_f && is_ibmext_in_sjis(c2)){
3171         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3172         if (val){
3173             if (val > 0x7FFF){
3174                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3175                 c1 = val & 0xff;
3176             }else{
3177                 c2 = val >> 8;
3178                 c1 = val & 0xff;
3179             }
3180             if (p2) *p2 = c2;
3181             if (p1) *p1 = c1;
3182             return 0;
3183         }
3184     }
3185 #endif
3186     if(c2 >= 0x80){
3187         if(x0213_f && c2 >= 0xF0){
3188             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3189                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3190             }else{ /* 78<=k<=94 */
3191                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3192                 if (0x9E < c1) c2++;
3193             }
3194         }else{
3195             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3196             if (0x9E < c1) c2++;
3197         }
3198         if (c1 < 0x9F)
3199             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3200         else {
3201             c1 = c1 - 0x7E;
3202         }
3203     }
3204
3205 #ifdef X0212_ENABLE
3206     c2 = x0212_unshift(c2);
3207 #endif
3208     if (p2) *p2 = c2;
3209     if (p1) *p1 = c1;
3210     return 0;
3211 }
3212
3213 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3214 {
3215     if (c2 == X0201) {
3216         c1 &= 0x7f;
3217     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3218         /* NOP */
3219     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3220         /* CP932 UDC */
3221         if(c1 == 0x7F) return 0;
3222         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3223         c2 = 0;
3224     } else {
3225         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3226         if (ret) return ret;
3227     }
3228     (*oconv)(c2, c1);
3229     return 0;
3230 }
3231
3232 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3233 {
3234     if (c2 == X0201) {
3235         c1 &= 0x7f;
3236 #ifdef X0212_ENABLE
3237     }else if (c2 == 0x8f){
3238         if (c0 == 0){
3239             return -1;
3240         }
3241         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3242             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3243             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3244             c2 = 0;
3245         } else {
3246             c2 = (c2 << 8) | (c1 & 0x7f);
3247             c1 = c0 & 0x7f;
3248 #ifdef SHIFTJIS_CP932
3249             if (cp51932_f){
3250                 nkf_char s2, s1;
3251                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3252                     s2e_conv(s2, s1, &c2, &c1);
3253                     if (c2 < 0x100){
3254                         c1 &= 0x7f;
3255                         c2 &= 0x7f;
3256                     }
3257                 }
3258             }
3259 #endif /* SHIFTJIS_CP932 */
3260         }
3261 #endif /* X0212_ENABLE */
3262     } else if (c2 == SSO){
3263         c2 = X0201;
3264         c1 &= 0x7f;
3265     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3266         /* NOP */
3267     } else {
3268         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3269             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3270             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3271             c2 = 0;
3272         } else {
3273             c1 &= 0x7f;
3274             c2 &= 0x7f;
3275 #ifdef SHIFTJIS_CP932
3276             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3277                 nkf_char s2, s1;
3278                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3279                     s2e_conv(s2, s1, &c2, &c1);
3280                     if (c2 < 0x100){
3281                         c1 &= 0x7f;
3282                         c2 &= 0x7f;
3283                     }
3284                 }
3285             }
3286 #endif /* SHIFTJIS_CP932 */
3287         }
3288     }
3289     (*oconv)(c2, c1);
3290     return 0;
3291 }
3292
3293 #ifdef UTF8_INPUT_ENABLE
3294 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3295 {
3296     nkf_char ret = 0;
3297
3298     if (!c1){
3299         *p2 = 0;
3300         *p1 = c2;
3301     }else if (0xc0 <= c2 && c2 <= 0xef) {
3302         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3303 #ifdef NUMCHAR_OPTION
3304         if (ret > 0){
3305             if (p2) *p2 = 0;
3306             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3307             ret = 0;
3308         }
3309 #endif
3310     }
3311     return ret;
3312 }
3313
3314 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3315 {
3316     nkf_char ret = 0;
3317     static const char w_iconv_utf8_1st_byte[] =
3318     { /* 0xC0 - 0xFF */
3319         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3320         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3321         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3322         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3323
3324     if (c2 < 0 || 0xff < c2) {
3325     }else if (c2 == 0) { /* 0 : 1 byte*/
3326         c0 = 0;
3327     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3328         return 0;
3329     } else{
3330         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3331         case 21:
3332             if (c1 < 0x80 || 0xBF < c1) return 0;
3333             break;
3334         case 30:
3335             if (c0 == 0) return -1;
3336             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3337                 return 0;
3338             break;
3339         case 31:
3340         case 33:
3341             if (c0 == 0) return -1;
3342             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3343                 return 0;
3344             break;
3345         case 32:
3346             if (c0 == 0) return -1;
3347             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3348                 return 0;
3349             break;
3350         case 40:
3351             if (c0 == 0) return -2;
3352             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3353                 return 0;
3354             break;
3355         case 41:
3356             if (c0 == 0) return -2;
3357             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3358                 return 0;
3359             break;
3360         case 42:
3361             if (c0 == 0) return -2;
3362             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3363                 return 0;
3364             break;
3365         default:
3366             return 0;
3367             break;
3368         }
3369     }
3370     if (c2 == 0 || c2 == EOF){
3371     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3372         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3373         c2 = 0;
3374     } else {
3375         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3376     }
3377     if (ret == 0){
3378         (*oconv)(c2, c1);
3379     }
3380     return ret;
3381 }
3382 #endif
3383
3384 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3385 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3386 {
3387     val &= VALUE_MASK;
3388     if (val < 0x80){
3389         *p2 = val;
3390         *p1 = 0;
3391         *p0 = 0;
3392     }else if (val < 0x800){
3393         *p2 = 0xc0 | (val >> 6);
3394         *p1 = 0x80 | (val & 0x3f);
3395         *p0 = 0;
3396     } else if (val <= NKF_INT32_C(0xFFFF)) {
3397         *p2 = 0xe0 | (val >> 12);
3398         *p1 = 0x80 | ((val >> 6) & 0x3f);
3399         *p0 = 0x80 | (val        & 0x3f);
3400     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3401         *p2 = 0xe0 |  (val >> 16);
3402         *p1 = 0x80 | ((val >> 12) & 0x3f);
3403         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3404     } else {
3405         *p2 = 0;
3406         *p1 = 0;
3407         *p0 = 0;
3408     }
3409 }
3410 #endif
3411
3412 #ifdef UTF8_INPUT_ENABLE
3413 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3414 {
3415     nkf_char val;
3416     if (c2 >= 0xf8) {
3417         val = -1;
3418     } else if (c2 >= 0xf0){
3419         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3420         val = (c2 & 0x0f) << 18;
3421         val |= (c1 & 0x3f) << 12;
3422         val |= (c0 & 0x3f00) >> 2;
3423         val |= (c0 & 0x3f);
3424     }else if (c2 >= 0xe0){
3425         val = (c2 & 0x0f) << 12;
3426         val |= (c1 & 0x3f) << 6;
3427         val |= (c0 & 0x3f);
3428     }else if (c2 >= 0xc0){
3429         val = (c2 & 0x1f) << 6;
3430         val |= (c1 & 0x3f);
3431     }else{
3432         val = c2;
3433     }
3434     return val;
3435 }
3436
3437 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3438 {
3439     nkf_char c2, c1, c0;
3440     nkf_char ret = 0;
3441     val &= VALUE_MASK;
3442     if (val < 0x80){
3443         *p2 = 0;
3444         *p1 = val;
3445     }else{
3446         w16w_conv(val, &c2, &c1, &c0);
3447         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3448 #ifdef NUMCHAR_OPTION
3449         if (ret > 0){
3450             *p2 = 0;
3451             *p1 = CLASS_UNICODE | val;
3452             ret = 0;
3453         }
3454 #endif
3455     }
3456     return ret;
3457 }
3458 #endif
3459
3460 #ifdef UTF8_INPUT_ENABLE
3461 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3462 {
3463     nkf_char ret = 0;
3464     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3465         (*oconv)(c2, c1);
3466         return 0;
3467     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3468         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3469             return -2;
3470         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3471         c2 = 0;
3472     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3473         /*
3474            return 2;
3475         */
3476         return 1;
3477     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3478     if (ret) return ret;
3479     (*oconv)(c2, c1);
3480     return 0;
3481 }
3482
3483 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3484 {
3485     int ret = 0;
3486
3487     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3488     } else if (is_unicode_bmp(c1)) {
3489         ret = w16e_conv(c1, &c2, &c1);
3490     } else {
3491         c2 = 0;
3492         c1 =  CLASS_UNICODE | c1;
3493     }
3494     if (ret) return ret;
3495     (*oconv)(c2, c1);
3496     return 0;
3497 }
3498
3499 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3500 {
3501     const unsigned short *const *pp;
3502     const unsigned short *const *const *ppp;
3503     static const char no_best_fit_chars_table_C2[] =
3504     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3505         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3506         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3507         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3508     static const char no_best_fit_chars_table_C2_ms[] =
3509     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3510         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3511         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3512         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3513     static const char no_best_fit_chars_table_932_C2[] =
3514     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3515         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3516         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3517         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3518     static const char no_best_fit_chars_table_932_C3[] =
3519     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3520         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3521         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3522         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3523     nkf_char ret = 0;
3524
3525     if(c2 < 0x80){
3526         *p2 = 0;
3527         *p1 = c2;
3528     }else if(c2 < 0xe0){
3529         if(no_best_fit_chars_f){
3530             if(ms_ucs_map_f == UCS_MAP_CP932){
3531                 switch(c2){
3532                 case 0xC2:
3533                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3534                     break;
3535                 case 0xC3:
3536                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3537                     break;
3538                 }
3539             }else if(!cp932inv_f){
3540                 switch(c2){
3541                 case 0xC2:
3542                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3543                     break;
3544                 case 0xC3:
3545                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3546                     break;
3547                 }
3548             }else if(ms_ucs_map_f == UCS_MAP_MS){
3549                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3550             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3551                 switch(c2){
3552                 case 0xC2:
3553                     switch(c1){
3554                     case 0xA2:
3555                     case 0xA3:
3556                     case 0xA5:
3557                     case 0xA6:
3558                     case 0xAC:
3559                     case 0xAF:
3560                     case 0xB8:
3561                         return 1;
3562                     }
3563                     break;
3564                 }
3565             }
3566         }
3567         pp =
3568             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3569             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3570             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3571             utf8_to_euc_2bytes;
3572         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3573     }else if(c0 < 0xF0){
3574         if(no_best_fit_chars_f){
3575             if(ms_ucs_map_f == UCS_MAP_CP932){
3576                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3577             }else if(ms_ucs_map_f == UCS_MAP_MS){
3578                 switch(c2){
3579                 case 0xE2:
3580                     switch(c1){
3581                     case 0x80:
3582                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3583                         break;
3584                     case 0x88:
3585                         if(c0 == 0x92) return 1;
3586                         break;
3587                     }
3588                     break;
3589                 case 0xE3:
3590                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3591                     break;
3592                 }
3593             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3594                 switch(c2){
3595                 case 0xE3:
3596                     switch(c1){
3597                     case 0x82:
3598                             if(c0 == 0x94) return 1;
3599                         break;
3600                     case 0x83:
3601                             if(c0 == 0xBB) return 1;
3602                         break;
3603                     }
3604                     break;
3605                 }
3606             }else{
3607                 switch(c2){
3608                 case 0xE2:
3609                     switch(c1){
3610                     case 0x80:
3611                         if(c0 == 0x95) return 1;
3612                         break;
3613                     case 0x88:
3614                         if(c0 == 0xA5) return 1;
3615                         break;
3616                     }
3617                     break;
3618                 case 0xEF:
3619                     switch(c1){
3620                     case 0xBC:
3621                         if(c0 == 0x8D) return 1;
3622                         break;
3623                     case 0xBD:
3624                         if(c0 == 0x9E && !cp932inv_f) return 1;
3625                         break;
3626                     case 0xBF:
3627                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3628                         break;
3629                     }
3630                     break;
3631                 }
3632             }
3633         }
3634         ppp =
3635             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3636             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3637             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3638             utf8_to_euc_3bytes;
3639         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3640     }else return -1;
3641 #ifdef SHIFTJIS_CP932
3642     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3643         nkf_char s2, s1;
3644         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3645             s2e_conv(s2, s1, p2, p1);
3646         }else{
3647             ret = 1;
3648         }
3649     }
3650 #endif
3651     return ret;
3652 }
3653
3654 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3655 {
3656     nkf_char c2;
3657     const unsigned short *p;
3658     unsigned short val;
3659
3660     if (pp == 0) return 1;
3661
3662     c1 -= 0x80;
3663     if (c1 < 0 || psize <= c1) return 1;
3664     p = pp[c1];
3665     if (p == 0)  return 1;
3666
3667     c0 -= 0x80;
3668     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3669     val = p[c0];
3670     if (val == 0) return 1;
3671     if (no_cp932ext_f && (
3672         (val>>8) == 0x2D || /* NEC special characters */
3673         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3674         )) return 1;
3675
3676     c2 = val >> 8;
3677    if (val > 0x7FFF){
3678         c2 &= 0x7f;
3679         c2 |= PREFIX_EUCG3;
3680     }
3681     if (c2 == SO) c2 = X0201;
3682     c1 = val & 0x7f;
3683     if (p2) *p2 = c2;
3684     if (p1) *p1 = c1;
3685     return 0;
3686 }
3687
3688 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3689 {
3690     int shift = 20;
3691     c &= VALUE_MASK;
3692     while(shift >= 0){
3693         if(c >= 1<<shift){
3694             while(shift >= 0){
3695                 (*f)(0, bin2hex(c>>shift));
3696                 shift -= 4;
3697             }
3698         }else{
3699             shift -= 4;
3700         }
3701     }
3702     return;
3703 }
3704
3705 void encode_fallback_html(nkf_char c)
3706 {
3707     (*oconv)(0, '&');
3708     (*oconv)(0, '#');
3709     c &= VALUE_MASK;
3710     if(c >= NKF_INT32_C(1000000))
3711         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3712     if(c >= NKF_INT32_C(100000))
3713         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3714     if(c >= 10000)
3715         (*oconv)(0, 0x30+(c/10000  )%10);
3716     if(c >= 1000)
3717         (*oconv)(0, 0x30+(c/1000   )%10);
3718     if(c >= 100)
3719         (*oconv)(0, 0x30+(c/100    )%10);
3720     if(c >= 10)
3721         (*oconv)(0, 0x30+(c/10     )%10);
3722     if(c >= 0)
3723         (*oconv)(0, 0x30+ c         %10);
3724     (*oconv)(0, ';');
3725     return;
3726 }
3727
3728 void encode_fallback_xml(nkf_char c)
3729 {
3730     (*oconv)(0, '&');
3731     (*oconv)(0, '#');
3732     (*oconv)(0, 'x');
3733     nkf_each_char_to_hex(oconv, c);
3734     (*oconv)(0, ';');
3735     return;
3736 }
3737
3738 void encode_fallback_java(nkf_char c)
3739 {
3740     (*oconv)(0, '\\');
3741     c &= VALUE_MASK;
3742     if(!is_unicode_bmp(c)){
3743         (*oconv)(0, 'U');
3744         (*oconv)(0, '0');
3745         (*oconv)(0, '0');
3746         (*oconv)(0, bin2hex(c>>20));
3747         (*oconv)(0, bin2hex(c>>16));
3748     }else{
3749         (*oconv)(0, 'u');
3750     }
3751     (*oconv)(0, bin2hex(c>>12));
3752     (*oconv)(0, bin2hex(c>> 8));
3753     (*oconv)(0, bin2hex(c>> 4));
3754     (*oconv)(0, bin2hex(c    ));
3755     return;
3756 }
3757
3758 void encode_fallback_perl(nkf_char c)
3759 {
3760     (*oconv)(0, '\\');
3761     (*oconv)(0, 'x');
3762     (*oconv)(0, '{');
3763     nkf_each_char_to_hex(oconv, c);
3764     (*oconv)(0, '}');
3765     return;
3766 }
3767
3768 void encode_fallback_subchar(nkf_char c)
3769 {
3770     c = unicode_subchar;
3771     (*oconv)((c>>8)&0xFF, c&0xFF);
3772     return;
3773 }
3774 #endif
3775
3776 #ifdef UTF8_OUTPUT_ENABLE
3777 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3778 {
3779     const unsigned short *p;
3780
3781     if (c2 == X0201) {
3782         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3783             switch (c1) {
3784             case 0x20:
3785                 return 0xA0;
3786             case 0x7D:
3787                 return 0xA9;
3788             }
3789         }
3790         p = euc_to_utf8_1byte;
3791 #ifdef X0212_ENABLE
3792     } else if (is_eucg3(c2)){
3793         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3794             return 0xA6;
3795         }
3796         c2 = (c2&0x7f) - 0x21;
3797         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3798             p = x0212_to_utf8_2bytes[c2];
3799         else
3800             return 0;
3801 #endif
3802     } else {
3803         c2 &= 0x7f;
3804         c2 = (c2&0x7f) - 0x21;
3805         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3806             p =
3807                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3808                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3809                 euc_to_utf8_2bytes_ms[c2];
3810         else
3811             return 0;
3812     }
3813     if (!p) return 0;
3814     c1 = (c1 & 0x7f) - 0x21;
3815     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3816         return p[c1];
3817     return 0;
3818 }
3819
3820 void w_oconv(nkf_char c2, nkf_char c1)
3821 {
3822     nkf_char c0;
3823     nkf_char val;
3824
3825     if (output_bom_f) {
3826         output_bom_f = FALSE;
3827         (*o_putc)('\357');
3828         (*o_putc)('\273');
3829         (*o_putc)('\277');
3830     }
3831
3832     if (c2 == EOF) {
3833         (*o_putc)(EOF);
3834         return;
3835     }
3836
3837 #ifdef NUMCHAR_OPTION
3838     if (c2 == 0 && is_unicode_capsule(c1)){
3839         val = c1 & VALUE_MASK;
3840         if (val < 0x80){
3841             (*o_putc)(val);
3842         }else if (val < 0x800){
3843             (*o_putc)(0xC0 | (val >> 6));
3844             (*o_putc)(0x80 | (val & 0x3f));
3845         } else if (val <= NKF_INT32_C(0xFFFF)) {
3846             (*o_putc)(0xE0 | (val >> 12));
3847             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3848             (*o_putc)(0x80 | (val        & 0x3f));
3849         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3850             (*o_putc)(0xF0 | ( val>>18));
3851             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3852             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3853             (*o_putc)(0x80 | ( val      & 0x3f));
3854         }
3855         return;
3856     }
3857 #endif
3858
3859     if (c2 == 0) {
3860         output_mode = ASCII;
3861         (*o_putc)(c1);
3862     } else if (c2 == ISO8859_1) {
3863         output_mode = UTF8;
3864         (*o_putc)(c1 | 0x080);
3865     } else {
3866         output_mode = UTF8;
3867         val = e2w_conv(c2, c1);
3868         if (val){
3869             w16w_conv(val, &c2, &c1, &c0);
3870             (*o_putc)(c2);
3871             if (c1){
3872                 (*o_putc)(c1);
3873                 if (c0) (*o_putc)(c0);
3874             }
3875         }
3876     }
3877 }
3878
3879 void w_oconv16(nkf_char c2, nkf_char c1)
3880 {
3881     if (output_bom_f) {
3882         output_bom_f = FALSE;
3883         if (output_endian == ENDIAN_LITTLE){
3884             (*o_putc)((unsigned char)'\377');
3885             (*o_putc)('\376');
3886         }else{
3887             (*o_putc)('\376');
3888             (*o_putc)((unsigned char)'\377');
3889         }
3890     }
3891
3892     if (c2 == EOF) {
3893         (*o_putc)(EOF);
3894         return;
3895     }
3896
3897     if (c2 == ISO8859_1) {
3898         c2 = 0;
3899         c1 |= 0x80;
3900 #ifdef NUMCHAR_OPTION
3901     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3902         if (is_unicode_bmp(c1)) {
3903             c2 = (c1 >> 8) & 0xff;
3904             c1 &= 0xff;
3905         } else {
3906             c1 &= VALUE_MASK;
3907             if (c1 <= UNICODE_MAX) {
3908                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3909                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3910                 if (output_endian == ENDIAN_LITTLE){
3911                     (*o_putc)(c2 & 0xff);
3912                     (*o_putc)((c2 >> 8) & 0xff);
3913                     (*o_putc)(c1 & 0xff);
3914                     (*o_putc)((c1 >> 8) & 0xff);
3915                 }else{
3916                     (*o_putc)((c2 >> 8) & 0xff);
3917                     (*o_putc)(c2 & 0xff);
3918                     (*o_putc)((c1 >> 8) & 0xff);
3919                     (*o_putc)(c1 & 0xff);
3920                 }
3921             }
3922             return;
3923         }
3924 #endif
3925     } else if (c2) {
3926         nkf_char val = e2w_conv(c2, c1);
3927         c2 = (val >> 8) & 0xff;
3928         c1 = val & 0xff;
3929         if (!val) return;
3930     }
3931     if (output_endian == ENDIAN_LITTLE){
3932         (*o_putc)(c1);
3933         (*o_putc)(c2);
3934     }else{
3935         (*o_putc)(c2);
3936         (*o_putc)(c1);
3937     }
3938 }
3939
3940 void w_oconv32(nkf_char c2, nkf_char c1)
3941 {
3942     if (output_bom_f) {
3943         output_bom_f = FALSE;
3944         if (output_endian == ENDIAN_LITTLE){
3945             (*o_putc)((unsigned char)'\377');
3946             (*o_putc)('\376');
3947             (*o_putc)('\000');
3948             (*o_putc)('\000');
3949         }else{
3950             (*o_putc)('\000');
3951             (*o_putc)('\000');
3952             (*o_putc)('\376');
3953             (*o_putc)((unsigned char)'\377');
3954         }
3955     }
3956
3957     if (c2 == EOF) {
3958         (*o_putc)(EOF);
3959         return;
3960     }
3961
3962     if (c2 == ISO8859_1) {
3963         c1 |= 0x80;
3964 #ifdef NUMCHAR_OPTION
3965     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3966         c1 &= VALUE_MASK;
3967 #endif
3968     } else if (c2) {
3969         c1 = e2w_conv(c2, c1);
3970         if (!c1) return;
3971     }
3972     if (output_endian == ENDIAN_LITTLE){
3973         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3974         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3975         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3976         (*o_putc)('\000');
3977     }else{
3978         (*o_putc)('\000');
3979         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3980         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3981         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3982     }
3983 }
3984 #endif
3985
3986 void e_oconv(nkf_char c2, nkf_char c1)
3987 {
3988 #ifdef NUMCHAR_OPTION
3989     if (c2 == 0 && is_unicode_capsule(c1)){
3990         w16e_conv(c1, &c2, &c1);
3991         if (c2 == 0 && is_unicode_capsule(c1)){
3992             c2 = c1 & VALUE_MASK;
3993             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
3994                 /* eucJP-ms UDC */
3995                 c1 &= 0xFFF;
3996                 c2 = c1 / 94;
3997                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
3998                 c1 = 0x21 + c1 % 94;
3999                 if (is_eucg3(c2)){
4000                     (*o_putc)(0x8f);
4001                     (*o_putc)((c2 & 0x7f) | 0x080);
4002                     (*o_putc)(c1 | 0x080);
4003                 }else{
4004                     (*o_putc)((c2 & 0x7f) | 0x080);
4005                     (*o_putc)(c1 | 0x080);
4006                 }
4007                 return;
4008             } else {
4009                 if (encode_fallback) (*encode_fallback)(c1);
4010                 return;
4011             }
4012         }
4013     }
4014 #endif
4015     if (c2 == EOF) {
4016         (*o_putc)(EOF);
4017         return;
4018     } else if (c2 == 0) {
4019         output_mode = ASCII;
4020         (*o_putc)(c1);
4021     } else if (c2 == X0201) {
4022         output_mode = JAPANESE_EUC;
4023         (*o_putc)(SSO); (*o_putc)(c1|0x80);
4024     } else if (c2 == ISO8859_1) {
4025         output_mode = ISO8859_1;
4026         (*o_putc)(c1 | 0x080);
4027 #ifdef X0212_ENABLE
4028     } else if (is_eucg3(c2)){
4029         output_mode = JAPANESE_EUC;
4030 #ifdef SHIFTJIS_CP932
4031         if (!cp932inv_f){
4032             nkf_char s2, s1;
4033             if (e2s_conv(c2, c1, &s2, &s1) == 0){
4034                 s2e_conv(s2, s1, &c2, &c1);
4035             }
4036         }
4037 #endif
4038         if (c2 == 0) {
4039             output_mode = ASCII;
4040             (*o_putc)(c1);
4041         }else if (is_eucg3(c2)){
4042             if (x0212_f){
4043                 (*o_putc)(0x8f);
4044                 (*o_putc)((c2 & 0x7f) | 0x080);
4045                 (*o_putc)(c1 | 0x080);
4046             }
4047         }else{
4048             (*o_putc)((c2 & 0x7f) | 0x080);
4049             (*o_putc)(c1 | 0x080);
4050         }
4051 #endif
4052     } else {
4053         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
4054             set_iconv(FALSE, 0);
4055             return; /* too late to rescue this char */
4056         }
4057         output_mode = JAPANESE_EUC;
4058         (*o_putc)(c2 | 0x080);
4059         (*o_putc)(c1 | 0x080);
4060     }
4061 }
4062
4063 #ifdef X0212_ENABLE
4064 nkf_char x0212_shift(nkf_char c)
4065 {
4066     nkf_char ret = c;
4067     c &= 0x7f;
4068     if (is_eucg3(ret)){
4069         if (0x75 <= c && c <= 0x7f){
4070             ret = c + (0x109 - 0x75);
4071         }
4072     }else{
4073         if (0x75 <= c && c <= 0x7f){
4074             ret = c + (0x113 - 0x75);
4075         }
4076     }
4077     return ret;
4078 }
4079
4080
4081 nkf_char x0212_unshift(nkf_char c)
4082 {
4083     nkf_char ret = c;
4084     if (0x7f <= c && c <= 0x88){
4085         ret = c + (0x75 - 0x7f);
4086     }else if (0x89 <= c && c <= 0x92){
4087         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4088     }
4089     return ret;
4090 }
4091 #endif /* X0212_ENABLE */
4092
4093 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4094 {
4095     nkf_char ndx;
4096     if (is_eucg3(c2)){
4097         ndx = c2 & 0x7f;
4098         if (x0213_f){
4099             if((0x21 <= ndx && ndx <= 0x2F)){
4100                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4101                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4102                 return 0;
4103             }else if(0x6E <= ndx && ndx <= 0x7E){
4104                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4105                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4106                 return 0;
4107             }
4108             return 1;
4109         }
4110 #ifdef X0212_ENABLE
4111         else if(nkf_isgraph(ndx)){
4112             nkf_char val = 0;
4113             const unsigned short *ptr;
4114             ptr = x0212_shiftjis[ndx - 0x21];
4115             if (ptr){
4116                 val = ptr[(c1 & 0x7f) - 0x21];
4117             }
4118             if (val){
4119                 c2 = val >> 8;
4120                 c1 = val & 0xff;
4121                 if (p2) *p2 = c2;
4122                 if (p1) *p1 = c1;
4123                 return 0;
4124             }
4125             c2 = x0212_shift(c2);
4126         }
4127 #endif /* X0212_ENABLE */
4128     }
4129     if(0x7F < c2) return 1;
4130     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4131     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4132     return 0;
4133 }
4134
4135 void s_oconv(nkf_char c2, nkf_char c1)
4136 {
4137 #ifdef NUMCHAR_OPTION
4138     if (c2 == 0 && is_unicode_capsule(c1)){
4139         w16e_conv(c1, &c2, &c1);
4140         if (c2 == 0 && is_unicode_capsule(c1)){
4141             c2 = c1 & VALUE_MASK;
4142             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
4143                 /* CP932 UDC */
4144                 c1 &= 0xFFF;
4145                 c2 = c1 / 188 + 0xF0;
4146                 c1 = c1 % 188;
4147                 c1 += 0x40 + (c1 > 0x3e);
4148                 (*o_putc)(c2);
4149                 (*o_putc)(c1);
4150                 return;
4151             } else {
4152                 if(encode_fallback)(*encode_fallback)(c1);
4153                 return;
4154             }
4155         }
4156     }
4157 #endif
4158     if (c2 == EOF) {
4159         (*o_putc)(EOF);
4160         return;
4161     } else if (c2 == 0) {
4162         output_mode = ASCII;
4163         (*o_putc)(c1);
4164     } else if (c2 == X0201) {
4165         output_mode = SHIFT_JIS;
4166         (*o_putc)(c1|0x80);
4167     } else if (c2 == ISO8859_1) {
4168         output_mode = ISO8859_1;
4169         (*o_putc)(c1 | 0x080);
4170 #ifdef X0212_ENABLE
4171     } else if (is_eucg3(c2)){
4172         output_mode = SHIFT_JIS;
4173         if (e2s_conv(c2, c1, &c2, &c1) == 0){
4174             (*o_putc)(c2);
4175             (*o_putc)(c1);
4176         }
4177 #endif
4178     } else {
4179         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4180             set_iconv(FALSE, 0);
4181             return; /* too late to rescue this char */
4182         }
4183         output_mode = SHIFT_JIS;
4184         e2s_conv(c2, c1, &c2, &c1);
4185
4186 #ifdef SHIFTJIS_CP932
4187         if (cp932inv_f
4188             && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
4189             nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
4190             if (c){
4191                 c2 = c >> 8;
4192                 c1 = c & 0xff;
4193             }
4194         }
4195 #endif /* SHIFTJIS_CP932 */
4196
4197         (*o_putc)(c2);
4198         if (prefix_table[(unsigned char)c1]){
4199             (*o_putc)(prefix_table[(unsigned char)c1]);
4200         }
4201         (*o_putc)(c1);
4202     }
4203 }
4204
4205 void j_oconv(nkf_char c2, nkf_char c1)
4206 {
4207 #ifdef NUMCHAR_OPTION
4208     if (c2 == 0 && is_unicode_capsule(c1)){
4209         w16e_conv(c1, &c2, &c1);
4210         if (c2 == 0 && is_unicode_capsule(c1)){
4211             c2 = c1 & VALUE_MASK;
4212             if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
4213                 /* CP5022x UDC */
4214                 c1 &= 0xFFF;
4215                 c2 = 0x7F + c1 / 94;
4216                 c1 = 0x21 + c1 % 94;
4217             } else {
4218                 if (encode_fallback) (*encode_fallback)(c1);
4219                 return;
4220             }
4221