OSDN Git Service

* Remove variable is_inputcode_mixed.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.139 2007/10/01 21:52:14 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-10-02"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
43 #define MSDOS
44 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
45 #define __WIN32__
46 #endif
47 #endif
48
49 #ifdef PERL_XS
50 #undef OVERWRITE
51 #endif
52
53 #ifndef PERL_XS
54 #include <stdio.h>
55 #endif
56
57 #include <stdlib.h>
58 #include <string.h>
59
60 #if defined(MSDOS) || defined(__OS2__)
61 #include <fcntl.h>
62 #include <io.h>
63 #if defined(_MSC_VER) || defined(__WATCOMC__)
64 #define mktemp _mktemp
65 #endif
66 #endif
67
68 #ifdef MSDOS
69 #ifdef LSI_C
70 #define setbinmode(fp) fsetbin(fp)
71 #elif defined(__DJGPP__)
72 #include <libc/dosio.h>
73 #define setbinmode(fp) djgpp_setbinmode(fp)
74 #else /* Microsoft C, Turbo C */
75 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
76 #endif
77 #else /* UNIX */
78 #define setbinmode(fp)
79 #endif
80
81 #if defined(__DJGPP__)
82 void  djgpp_setbinmode(FILE *fp)
83 {
84     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
85     int fd, m;
86     fd = fileno(fp);
87     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
88     __file_handle_set(fd, m);
89 }
90 #endif
91
92 #ifdef _IOFBF /* SysV and MSDOS, Windows */
93 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
94 #else /* BSD */
95 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
96 #endif
97
98 /*Borland C++ 4.5 EasyWin*/
99 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
100 #define         EASYWIN
101 #ifndef __WIN16__
102 #define __WIN16__
103 #endif
104 #include <windows.h>
105 #endif
106
107 #ifdef OVERWRITE
108 /* added by satoru@isoternet.org */
109 #if defined(__EMX__)
110 #include <sys/types.h>
111 #endif
112 #include <sys/stat.h>
113 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
114 #include <unistd.h>
115 #if defined(__WATCOMC__)
116 #include <sys/utime.h>
117 #else
118 #include <utime.h>
119 #endif
120 #else /* defined(MSDOS) */
121 #ifdef __WIN32__
122 #ifdef __BORLANDC__ /* BCC32 */
123 #include <utime.h>
124 #else /* !defined(__BORLANDC__) */
125 #include <sys/utime.h>
126 #endif /* (__BORLANDC__) */
127 #else /* !defined(__WIN32__) */
128 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
129 #include <sys/utime.h>
130 #elif defined(__TURBOC__) /* BCC */
131 #include <utime.h>
132 #elif defined(LSI_C) /* LSI C */
133 #endif /* (__WIN32__) */
134 #endif
135 #endif
136 #endif
137
138 #define         FALSE   0
139 #define         TRUE    1
140
141 /* state of output_mode and input_mode
142
143    c2           0 means ASCII
144                 X0201
145                 ISO8859_1
146                 X0208
147                 EOF      all termination
148    c1           32bit data
149
150  */
151
152 #define         ASCII           0
153 #define         X0208           1
154 #define         X0201           2
155 #define         ISO8859_1       8
156 #define         NO_X0201        3
157 #define         X0212      0x2844
158 #define         X0213_1    0x284F
159 #define         X0213_2    0x2850
160
161 /* Input Assumption */
162
163 #define         JIS_INPUT       4
164 #define         EUC_INPUT      16
165 #define         SJIS_INPUT      5
166 #define         LATIN1_INPUT    6
167 #define         FIXED_MIME      7
168 #define         STRICT_MIME     8
169
170 /* MIME ENCODE */
171
172 #define         ISO2022JP       9
173 #define         JAPANESE_EUC   10
174 #define         SHIFT_JIS      11
175
176 #define         UTF8           12
177 #define         UTF8_INPUT     13
178 #define         UTF16_INPUT    1015
179 #define         UTF32_INPUT    1017
180
181 /* byte order */
182
183 #define         ENDIAN_BIG      1234
184 #define         ENDIAN_LITTLE   4321
185 #define         ENDIAN_2143     2143
186 #define         ENDIAN_3412     3412
187
188 #define         WISH_TRUE      15
189
190 /* ASCII CODE */
191
192 #define         BS      0x08
193 #define         TAB     0x09
194 #define         LF      0x0a
195 #define         CR      0x0d
196 #define         ESC     0x1b
197 #define         SP      0x20
198 #define         AT      0x40
199 #define         SSP     0xa0
200 #define         DEL     0x7f
201 #define         SI      0x0f
202 #define         SO      0x0e
203 #define         SSO     0x8e
204 #define         SS3     0x8f
205 #define         CRLF    0x0D0A
206
207 #define         is_alnum(c)  \
208             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
209
210 /* I don't trust portablity of toupper */
211 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
212 #define nkf_isoctal(c)  ('0'<=c && c<='7')
213 #define nkf_isdigit(c)  ('0'<=c && c<='9')
214 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
215 #define nkf_isblank(c) (c == SP || c == TAB)
216 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
217 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
218 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
219 #define nkf_isprint(c) (SP<=c && c<='~')
220 #define nkf_isgraph(c) ('!'<=c && c<='~')
221 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
222                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
223                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
224 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
225 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
226
227 #define CP932_TABLE_BEGIN 0xFA
228 #define CP932_TABLE_END   0xFC
229 #define CP932INV_TABLE_BEGIN 0xED
230 #define CP932INV_TABLE_END   0xEE
231 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
232
233 #define         HOLD_SIZE       1024
234 #if defined(INT_IS_SHORT)
235 #define         IOBUF_SIZE      2048
236 #else
237 #define         IOBUF_SIZE      16384
238 #endif
239
240 #define         DEFAULT_J       'B'
241 #define         DEFAULT_R       'B'
242
243 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
244 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
245
246 #define         RANGE_NUM_MAX   18
247 #define         GETA1   0x22
248 #define         GETA2   0x2e
249
250
251 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
252 #define sizeof_euc_to_utf8_1byte 94
253 #define sizeof_euc_to_utf8_2bytes 94
254 #define sizeof_utf8_to_euc_C2 64
255 #define sizeof_utf8_to_euc_E5B8 64
256 #define sizeof_utf8_to_euc_2bytes 112
257 #define sizeof_utf8_to_euc_3bytes 16
258 #endif
259
260 /* MIME preprocessor */
261
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
264 #endif
265
266 struct input_code{
267     char *name;
268     nkf_char stat;
269     nkf_char score;
270     nkf_char index;
271     nkf_char buf[3];
272     void (*status_func)(struct input_code *, nkf_char);
273     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
274     int _file_stat;
275 };
276
277 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
278
279 #ifndef PERL_XS
280 static const char *CopyRight = COPY_RIGHT;
281 #endif
282 #if !defined(PERL_XS) && !defined(WIN32DLL)
283 static  nkf_char     noconvert(FILE *f);
284 #endif
285 static  void    module_connection(void);
286 static  nkf_char     kanji_convert(FILE *f);
287 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
288 static  nkf_char     push_hold_buf(nkf_char c2);
289 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
290 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
291 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
292 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
293 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
294 /* UCS Mapping
295  * 0: Shift_JIS, eucJP-ascii
296  * 1: eucJP-ms
297  * 2: CP932, CP51932
298  * 3: CP10001
299  */
300 #define UCS_MAP_ASCII   0
301 #define UCS_MAP_MS      1
302 #define UCS_MAP_CP932   2
303 #define UCS_MAP_CP10001 3
304 static int ms_ucs_map_f = UCS_MAP_ASCII;
305 #endif
306 #ifdef UTF8_INPUT_ENABLE
307 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
308 static  int     no_cp932ext_f = FALSE;
309 /* ignore ZERO WIDTH NO-BREAK SPACE */
310 static  int     no_best_fit_chars_f = FALSE;
311 static  int     input_endian = ENDIAN_BIG;
312 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
313 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
314 static  void    encode_fallback_html(nkf_char c);
315 static  void    encode_fallback_xml(nkf_char c);
316 static  void    encode_fallback_java(nkf_char c);
317 static  void    encode_fallback_perl(nkf_char c);
318 static  void    encode_fallback_subchar(nkf_char c);
319 static  void    (*encode_fallback)(nkf_char c) = NULL;
320 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
321 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
322 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
323 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
324 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
325 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
326 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
327 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
328 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
329 static  void    w_status(struct input_code *, nkf_char);
330 #endif
331 #ifdef UTF8_OUTPUT_ENABLE
332 static  int     output_bom_f = FALSE;
333 static  int     output_endian = ENDIAN_BIG;
334 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
335 static  void    w_oconv(nkf_char c2,nkf_char c1);
336 static  void    w_oconv16(nkf_char c2,nkf_char c1);
337 static  void    w_oconv32(nkf_char c2,nkf_char c1);
338 #endif
339 static  void    e_oconv(nkf_char c2,nkf_char c1);
340 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
341 static  void    s_oconv(nkf_char c2,nkf_char c1);
342 static  void    j_oconv(nkf_char c2,nkf_char c1);
343 static  void    fold_conv(nkf_char c2,nkf_char c1);
344 static  void    nl_conv(nkf_char c2,nkf_char c1);
345 static  void    z_conv(nkf_char c2,nkf_char c1);
346 static  void    rot_conv(nkf_char c2,nkf_char c1);
347 static  void    hira_conv(nkf_char c2,nkf_char c1);
348 static  void    base64_conv(nkf_char c2,nkf_char c1);
349 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
350 static  void    no_connection(nkf_char c2,nkf_char c1);
351 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
352
353 static  void    code_score(struct input_code *ptr);
354 static  void    code_status(nkf_char c);
355
356 static  void    std_putc(nkf_char c);
357 static  nkf_char     std_getc(FILE *f);
358 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
359
360 static  nkf_char     broken_getc(FILE *f);
361 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
362
363 static  nkf_char     mime_begin(FILE *f);
364 static  nkf_char     mime_getc(FILE *f);
365 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
366
367 static  void    switch_mime_getc(void);
368 static  void    unswitch_mime_getc(void);
369 static  nkf_char     mime_begin_strict(FILE *f);
370 static  nkf_char     mime_getc_buf(FILE *f);
371 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
372 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
373
374 static  nkf_char     base64decode(nkf_char c);
375 static  void    mime_prechar(nkf_char c2, nkf_char c1);
376 static  void    mime_putc(nkf_char c);
377 static  void    open_mime(nkf_char c);
378 static  void    close_mime(void);
379 static  void    eof_mime(void);
380 static  void    mimeout_addchar(nkf_char c);
381 #ifndef PERL_XS
382 static  void    usage(void);
383 static  void    version(void);
384 #endif
385 static  void    options(unsigned char *c);
386 #if defined(PERL_XS) || defined(WIN32DLL)
387 static  void    reinit(void);
388 #endif
389
390 /* buffers */
391
392 #if !defined(PERL_XS) && !defined(WIN32DLL)
393 static unsigned char   stdibuf[IOBUF_SIZE];
394 static unsigned char   stdobuf[IOBUF_SIZE];
395 #endif
396 static unsigned char   hold_buf[HOLD_SIZE*2];
397 static int             hold_count = 0;
398
399 /* MIME preprocessor fifo */
400
401 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
402 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
403 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
404 static unsigned char           mime_buf[MIME_BUF_SIZE];
405 static unsigned int            mime_top = 0;
406 static unsigned int            mime_last = 0;  /* decoded */
407 static unsigned int            mime_input = 0; /* undecoded */
408 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
409
410 /* flags */
411 static int             unbuf_f = FALSE;
412 static int             estab_f = FALSE;
413 static int             nop_f = FALSE;
414 static int             binmode_f = TRUE;       /* binary mode */
415 static int             rot_f = FALSE;          /* rot14/43 mode */
416 static int             hira_f = FALSE;          /* hira/kata henkan */
417 static int             input_f = FALSE;        /* non fixed input code  */
418 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
419 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
420 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
421 static int             mimebuf_f = FALSE;      /* MIME buffered input */
422 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
423 static int             iso8859_f = FALSE;      /* ISO8859 through */
424 static int             mimeout_f = FALSE;       /* base64 mode */
425 #if defined(MSDOS) || defined(__OS2__)
426 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
427 #else
428 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
429 #endif
430 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
431
432 #ifdef UNICODE_NORMALIZATION
433 static int nfc_f = FALSE;
434 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
435 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
436 static nkf_char nfc_getc(FILE *f);
437 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
438 #endif
439
440 #ifdef INPUT_OPTION
441 static int cap_f = FALSE;
442 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
443 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
444 static nkf_char cap_getc(FILE *f);
445 static nkf_char cap_ungetc(nkf_char c,FILE *f);
446
447 static int url_f = FALSE;
448 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
449 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
450 static nkf_char url_getc(FILE *f);
451 static nkf_char url_ungetc(nkf_char c,FILE *f);
452 #endif
453
454 #if defined(INT_IS_SHORT)
455 #define NKF_INT32_C(n)   (n##L)
456 #else
457 #define NKF_INT32_C(n)   (n)
458 #endif
459 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
460 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
461 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
462 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
463 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
464 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
465 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
466
467 #ifdef NUMCHAR_OPTION
468 static int numchar_f = FALSE;
469 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
470 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
471 static nkf_char numchar_getc(FILE *f);
472 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
473 #endif
474
475 #ifdef CHECK_OPTION
476 static int noout_f = FALSE;
477 static void no_putc(nkf_char c);
478 static nkf_char debug_f = FALSE;
479 static void debug(const char *str);
480 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
481 #endif
482
483 static int guess_f = FALSE;
484 #if !defined PERL_XS
485 static  void    print_guessed_code(char *filename);
486 #endif
487 static  void    set_input_codename(char *codename);
488
489 #ifdef EXEC_IO
490 static int exec_f = 0;
491 #endif
492
493 #ifdef SHIFTJIS_CP932
494 /* invert IBM extended characters to others */
495 static int cp51932_f = FALSE;
496
497 /* invert NEC-selected IBM extended characters to IBM extended characters */
498 static int cp932inv_f = TRUE;
499
500 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
501 #endif /* SHIFTJIS_CP932 */
502
503 #ifdef X0212_ENABLE
504 static int x0212_f = FALSE;
505 static nkf_char x0212_shift(nkf_char c);
506 static nkf_char x0212_unshift(nkf_char c);
507 #endif
508 static int x0213_f = FALSE;
509
510 static unsigned char prefix_table[256];
511
512 static void set_code_score(struct input_code *ptr, nkf_char score);
513 static void clr_code_score(struct input_code *ptr, nkf_char score);
514 static void status_disable(struct input_code *ptr);
515 static void status_push_ch(struct input_code *ptr, nkf_char c);
516 static void status_clear(struct input_code *ptr);
517 static void status_reset(struct input_code *ptr);
518 static void status_reinit(struct input_code *ptr);
519 static void status_check(struct input_code *ptr, nkf_char c);
520 static void e_status(struct input_code *, nkf_char);
521 static void s_status(struct input_code *, nkf_char);
522
523 struct input_code input_code_list[] = {
524     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
525     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
526 #ifdef UTF8_INPUT_ENABLE
527     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
528     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
529     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
530 #endif
531     {0}
532 };
533
534 static int              mimeout_mode = 0;
535 static int              base64_count = 0;
536
537 /* X0208 -> ASCII converter */
538
539 /* fold parameter */
540 static int             f_line = 0;    /* chars in line */
541 static int             f_prev = 0;
542 static int             fold_preserve_f = FALSE; /* preserve new lines */
543 static int             fold_f  = FALSE;
544 static int             fold_len  = 0;
545
546 /* options */
547 static unsigned char   kanji_intro = DEFAULT_J;
548 static unsigned char   ascii_intro = DEFAULT_R;
549
550 /* Folding */
551
552 #define FOLD_MARGIN  10
553 #define DEFAULT_FOLD 60
554
555 static int             fold_margin  = FOLD_MARGIN;
556
557 /* converters */
558
559 #ifdef DEFAULT_CODE_JIS
560 #   define  DEFAULT_CONV j_oconv
561 #endif
562 #ifdef DEFAULT_CODE_SJIS
563 #   define  DEFAULT_CONV s_oconv
564 #endif
565 #ifdef DEFAULT_CODE_EUC
566 #   define  DEFAULT_CONV e_oconv
567 #endif
568 #ifdef DEFAULT_CODE_UTF8
569 #   define  DEFAULT_CONV w_oconv
570 #endif
571
572 /* process default */
573 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
574
575 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
576 /* s_iconv or oconv */
577 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
578
579 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
580 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
581 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
582 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
583 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
584 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
585 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
586
587 /* static redirections */
588
589 static  void   (*o_putc)(nkf_char c) = std_putc;
590
591 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
592 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
593
594 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
595 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
596
597 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
598
599 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
600 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
601
602 /* for strict mime */
603 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
604 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
605
606 /* Global states */
607 static int output_mode = ASCII,    /* output kanji mode */
608            input_mode =  ASCII,    /* input kanji mode */
609            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
610 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
611
612 /* X0201 / X0208 conversion tables */
613
614 /* X0201 kana conversion table */
615 /* 90-9F A0-DF */
616 static const unsigned char cv[]= {
617     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
618     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
619     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
620     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
621     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
622     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
623     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
624     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
625     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
626     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
627     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
628     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
629     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
630     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
631     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
632     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
633     0x00,0x00};
634
635
636 /* X0201 kana conversion table for daguten */
637 /* 90-9F A0-DF */
638 static const unsigned char dv[]= {
639     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
644     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
645     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
646     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
647     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
648     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
649     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
650     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
651     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
652     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
653     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
654     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655     0x00,0x00};
656
657 /* X0201 kana conversion table for han-daguten */
658 /* 90-9F A0-DF */
659 static const unsigned char ev[]= {
660     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
661     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
671     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
672     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
674     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
675     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676     0x00,0x00};
677
678
679 /* X0208 kigou conversion table */
680 /* 0x8140 - 0x819e */
681 static const unsigned char fv[] = {
682
683     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
684     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
685     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
686     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
687     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
688     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
689     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
690     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
691     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
692     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
693     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
694     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
695 } ;
696
697
698
699 static int             file_out_f = FALSE;
700 #ifdef OVERWRITE
701 static int             overwrite_f = FALSE;
702 static int             preserve_time_f = FALSE;
703 static int             backup_f = FALSE;
704 static char            *backup_suffix = "";
705 static char *get_backup_filename(const char *suffix, const char *filename);
706 #endif
707
708 static int nlmode_f = 0;   /* CR, LF, CRLF */
709 static int input_nextline = 0; /* 0: unestablished, EOF: MIXED */
710 static nkf_char prev_cr = 0; /* CR or 0 */
711 #ifdef EASYWIN /*Easy Win */
712 static int             end_check;
713 #endif /*Easy Win */
714
715 #define STD_GC_BUFSIZE (256)
716 nkf_char std_gc_buf[STD_GC_BUFSIZE];
717 nkf_char std_gc_ndx;
718
719 #ifdef WIN32DLL
720 #include "nkf32dll.c"
721 #elif defined(PERL_XS)
722 #else /* WIN32DLL */
723 int main(int argc, char **argv)
724 {
725     FILE  *fin;
726     unsigned char  *cp;
727
728     char *outfname = NULL;
729     char *origfname;
730
731 #ifdef EASYWIN /*Easy Win */
732     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
733 #endif
734
735     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
736         cp = (unsigned char *)*argv;
737         options(cp);
738 #ifdef EXEC_IO
739         if (exec_f){
740             int fds[2], pid;
741             if (pipe(fds) < 0 || (pid = fork()) < 0){
742                 abort();
743             }
744             if (pid == 0){
745                 if (exec_f > 0){
746                     close(fds[0]);
747                     dup2(fds[1], 1);
748                 }else{
749                     close(fds[1]);
750                     dup2(fds[0], 0);
751                 }
752                 execvp(argv[1], &argv[1]);
753             }
754             if (exec_f > 0){
755                 close(fds[1]);
756                 dup2(fds[0], 0);
757             }else{
758                 close(fds[0]);
759                 dup2(fds[1], 1);
760             }
761             argc = 0;
762             break;
763         }
764 #endif
765     }
766     if(x0201_f == WISH_TRUE)
767          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
768
769     if (binmode_f == TRUE)
770 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
771     if (freopen("","wb",stdout) == NULL)
772         return (-1);
773 #else
774     setbinmode(stdout);
775 #endif
776
777     if (unbuf_f)
778       setbuf(stdout, (char *) NULL);
779     else
780       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
781
782     if (argc == 0) {
783       if (binmode_f == TRUE)
784 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
785       if (freopen("","rb",stdin) == NULL) return (-1);
786 #else
787       setbinmode(stdin);
788 #endif
789       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
790       if (nop_f)
791           noconvert(stdin);
792       else {
793           kanji_convert(stdin);
794           if (guess_f) print_guessed_code(NULL);
795       }
796     } else {
797       int nfiles = argc;
798         int is_argument_error = FALSE;
799       while (argc--) {
800             input_codename = NULL;
801 #ifdef CHECK_OPTION
802             iconv_for_check = 0;
803 #endif
804           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
805               perror(*--argv);
806                 *argv++;
807                 is_argument_error = TRUE;
808                 continue;
809           } else {
810 #ifdef OVERWRITE
811               int fd = 0;
812               int fd_backup = 0;
813 #endif
814
815 /* reopen file for stdout */
816               if (file_out_f == TRUE) {
817 #ifdef OVERWRITE
818                   if (overwrite_f){
819                       outfname = malloc(strlen(origfname)
820                                         + strlen(".nkftmpXXXXXX")
821                                         + 1);
822                       if (!outfname){
823                           perror(origfname);
824                           return -1;
825                       }
826                       strcpy(outfname, origfname);
827 #ifdef MSDOS
828                       {
829                           int i;
830                           for (i = strlen(outfname); i; --i){
831                               if (outfname[i - 1] == '/'
832                                   || outfname[i - 1] == '\\'){
833                                   break;
834                               }
835                           }
836                           outfname[i] = '\0';
837                       }
838                       strcat(outfname, "ntXXXXXX");
839                       mktemp(outfname);
840                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
841                                 S_IREAD | S_IWRITE);
842 #else
843                       strcat(outfname, ".nkftmpXXXXXX");
844                       fd = mkstemp(outfname);
845 #endif
846                       if (fd < 0
847                           || (fd_backup = dup(fileno(stdout))) < 0
848                           || dup2(fd, fileno(stdout)) < 0
849                           ){
850                           perror(origfname);
851                           return -1;
852                       }
853                   }else
854 #endif
855                   if(argc == 1) {
856                       outfname = *argv++;
857                       argc--;
858                   } else {
859                       outfname = "nkf.out";
860                   }
861
862                   if(freopen(outfname, "w", stdout) == NULL) {
863                       perror (outfname);
864                       return (-1);
865                   }
866                   if (binmode_f == TRUE) {
867 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
868                       if (freopen("","wb",stdout) == NULL)
869                            return (-1);
870 #else
871                       setbinmode(stdout);
872 #endif
873                   }
874               }
875               if (binmode_f == TRUE)
876 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
877                  if (freopen("","rb",fin) == NULL)
878                     return (-1);
879 #else
880                  setbinmode(fin);
881 #endif
882               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
883               if (nop_f)
884                   noconvert(fin);
885               else {
886                   char *filename = NULL;
887                   kanji_convert(fin);
888                   if (nfiles > 1) filename = origfname;
889                   if (guess_f) print_guessed_code(filename);
890               }
891               fclose(fin);
892 #ifdef OVERWRITE
893               if (overwrite_f) {
894                   struct stat     sb;
895 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
896                   time_t tb[2];
897 #else
898                   struct utimbuf  tb;
899 #endif
900
901                   fflush(stdout);
902                   close(fd);
903                   if (dup2(fd_backup, fileno(stdout)) < 0){
904                       perror("dup2");
905                   }
906                   if (stat(origfname, &sb)) {
907                       fprintf(stderr, "Can't stat %s\n", origfname);
908                   }
909                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
910                   if (chmod(outfname, sb.st_mode)) {
911                       fprintf(stderr, "Can't set permission %s\n", outfname);
912                   }
913
914                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
915                     if(preserve_time_f){
916 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
917                         tb[0] = tb[1] = sb.st_mtime;
918                         if (utime(outfname, tb)) {
919                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
920                         }
921 #else
922                         tb.actime  = sb.st_atime;
923                         tb.modtime = sb.st_mtime;
924                         if (utime(outfname, &tb)) {
925                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
926                         }
927 #endif
928                     }
929                     if(backup_f){
930                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
931 #ifdef MSDOS
932                         unlink(backup_filename);
933 #endif
934                         if (rename(origfname, backup_filename)) {
935                             perror(backup_filename);
936                             fprintf(stderr, "Can't rename %s to %s\n",
937                                     origfname, backup_filename);
938                         }
939                     }else{
940 #ifdef MSDOS
941                         if (unlink(origfname)){
942                             perror(origfname);
943                         }
944 #endif
945                     }
946                   if (rename(outfname, origfname)) {
947                       perror(origfname);
948                       fprintf(stderr, "Can't rename %s to %s\n",
949                               outfname, origfname);
950                   }
951                   free(outfname);
952               }
953 #endif
954           }
955       }
956         if (is_argument_error)
957             return(-1);
958     }
959 #ifdef EASYWIN /*Easy Win */
960     if (file_out_f == FALSE)
961         scanf("%d",&end_check);
962     else
963         fclose(stdout);
964 #else /* for Other OS */
965     if (file_out_f == TRUE)
966         fclose(stdout);
967 #endif /*Easy Win */
968     return (0);
969 }
970 #endif /* WIN32DLL */
971
972 #ifdef OVERWRITE
973 char *get_backup_filename(const char *suffix, const char *filename)
974 {
975     char *backup_filename;
976     int asterisk_count = 0;
977     int i, j;
978     int filename_length = strlen(filename);
979
980     for(i = 0; suffix[i]; i++){
981         if(suffix[i] == '*') asterisk_count++;
982     }
983
984     if(asterisk_count){
985         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
986         if (!backup_filename){
987             perror("Can't malloc backup filename.");
988             return NULL;
989         }
990
991         for(i = 0, j = 0; suffix[i];){
992             if(suffix[i] == '*'){
993                 backup_filename[j] = '\0';
994                 strncat(backup_filename, filename, filename_length);
995                 i++;
996                 j += filename_length;
997             }else{
998                 backup_filename[j++] = suffix[i++];
999             }
1000         }
1001         backup_filename[j] = '\0';
1002     }else{
1003         j = strlen(suffix) + filename_length;
1004         backup_filename = malloc( + 1);
1005         strcpy(backup_filename, filename);
1006         strcat(backup_filename, suffix);
1007         backup_filename[j] = '\0';
1008     }
1009     return backup_filename;
1010 }
1011 #endif
1012
1013 static const struct {
1014     const char *name;
1015     const char *alias;
1016 } long_option[] = {
1017     {"ic=", ""},
1018     {"oc=", ""},
1019     {"base64","jMB"},
1020     {"euc","e"},
1021     {"euc-input","E"},
1022     {"fj","jm"},
1023     {"help","v"},
1024     {"jis","j"},
1025     {"jis-input","J"},
1026     {"mac","sLm"},
1027     {"mime","jM"},
1028     {"mime-input","m"},
1029     {"msdos","sLw"},
1030     {"sjis","s"},
1031     {"sjis-input","S"},
1032     {"unix","eLu"},
1033     {"version","V"},
1034     {"windows","sLw"},
1035     {"hiragana","h1"},
1036     {"katakana","h2"},
1037     {"katakana-hiragana","h3"},
1038     {"guess", "g"},
1039     {"cp932", ""},
1040     {"no-cp932", ""},
1041 #ifdef X0212_ENABLE
1042     {"x0212", ""},
1043 #endif
1044 #ifdef UTF8_OUTPUT_ENABLE
1045     {"utf8", "w"},
1046     {"utf16", "w16"},
1047     {"ms-ucs-map", ""},
1048     {"fb-skip", ""},
1049     {"fb-html", ""},
1050     {"fb-xml", ""},
1051     {"fb-perl", ""},
1052     {"fb-java", ""},
1053     {"fb-subchar", ""},
1054     {"fb-subchar=", ""},
1055 #endif
1056 #ifdef UTF8_INPUT_ENABLE
1057     {"utf8-input", "W"},
1058     {"utf16-input", "W16"},
1059     {"no-cp932ext", ""},
1060     {"no-best-fit-chars",""},
1061 #endif
1062 #ifdef UNICODE_NORMALIZATION
1063     {"utf8mac-input", ""},
1064 #endif
1065 #ifdef OVERWRITE
1066     {"overwrite", ""},
1067     {"overwrite=", ""},
1068     {"in-place", ""},
1069     {"in-place=", ""},
1070 #endif
1071 #ifdef INPUT_OPTION
1072     {"cap-input", ""},
1073     {"url-input", ""},
1074 #endif
1075 #ifdef NUMCHAR_OPTION
1076     {"numchar-input", ""},
1077 #endif
1078 #ifdef CHECK_OPTION
1079     {"no-output", ""},
1080     {"debug", ""},
1081 #endif
1082 #ifdef SHIFTJIS_CP932
1083     {"cp932inv", ""},
1084 #endif
1085 #ifdef EXEC_IO
1086     {"exec-in", ""},
1087     {"exec-out", ""},
1088 #endif
1089     {"prefix=", ""},
1090 };
1091
1092 static int option_mode = 0;
1093
1094 void options(unsigned char *cp)
1095 {
1096     nkf_char i, j;
1097     unsigned char *p;
1098     unsigned char *cp_back = NULL;
1099     char codeset[32];
1100
1101     if (option_mode==1)
1102         return;
1103     while(*cp && *cp++!='-');
1104     while (*cp || cp_back) {
1105         if(!*cp){
1106             cp = cp_back;
1107             cp_back = NULL;
1108             continue;
1109         }
1110         p = 0;
1111         switch (*cp++) {
1112         case '-':  /* literal options */
1113             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1114                 option_mode = 1;
1115                 return;
1116             }
1117             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1118                 p = (unsigned char *)long_option[i].name;
1119                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1120                 if (*p == cp[j] || cp[j] == SP){
1121                     p = &cp[j] + 1;
1122                     break;
1123                 }
1124                 p = 0;
1125             }
1126             if (p == 0) return;
1127             while(*cp && *cp != SP && cp++);
1128             if (long_option[i].alias[0]){
1129                 cp_back = cp;
1130                 cp = (unsigned char *)long_option[i].alias;
1131             }else{
1132                 if (strcmp(long_option[i].name, "ic=") == 0){
1133                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1134                         codeset[i] = nkf_toupper(p[i]);
1135                     }
1136                     codeset[i] = 0;
1137                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1138                         input_f = JIS_INPUT;
1139                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1140                       strcmp(codeset, "CP50220") == 0 ||
1141                       strcmp(codeset, "CP50221") == 0 ||
1142                       strcmp(codeset, "CP50222") == 0){
1143                         input_f = JIS_INPUT;
1144 #ifdef SHIFTJIS_CP932
1145                         cp51932_f = TRUE;
1146 #endif
1147 #ifdef UTF8_OUTPUT_ENABLE
1148                         ms_ucs_map_f = UCS_MAP_CP932;
1149 #endif
1150                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1151                         input_f = JIS_INPUT;
1152 #ifdef X0212_ENABLE
1153                         x0212_f = TRUE;
1154 #endif
1155                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1156                         input_f = JIS_INPUT;
1157 #ifdef X0212_ENABLE
1158                         x0212_f = TRUE;
1159 #endif
1160                         x0213_f = TRUE;
1161                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1162                         input_f = SJIS_INPUT;
1163                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1164                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1165                              strcmp(codeset, "CP932") == 0 ||
1166                              strcmp(codeset, "MS932") == 0){
1167                         input_f = SJIS_INPUT;
1168 #ifdef SHIFTJIS_CP932
1169                         cp51932_f = TRUE;
1170 #endif
1171 #ifdef UTF8_OUTPUT_ENABLE
1172                         ms_ucs_map_f = UCS_MAP_CP932;
1173 #endif
1174                     }else if(strcmp(codeset, "CP10001") == 0){
1175                         input_f = SJIS_INPUT;
1176 #ifdef SHIFTJIS_CP932
1177                         cp51932_f = TRUE;
1178 #endif
1179 #ifdef UTF8_OUTPUT_ENABLE
1180                         ms_ucs_map_f = UCS_MAP_CP10001;
1181 #endif
1182                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1183                              strcmp(codeset, "EUC-JP") == 0){
1184                         input_f = EUC_INPUT;
1185                     }else if(strcmp(codeset, "CP51932") == 0){
1186                         input_f = EUC_INPUT;
1187 #ifdef SHIFTJIS_CP932
1188                         cp51932_f = TRUE;
1189 #endif
1190 #ifdef UTF8_OUTPUT_ENABLE
1191                         ms_ucs_map_f = UCS_MAP_CP932;
1192 #endif
1193                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1194                              strcmp(codeset, "EUCJP-MS") == 0 ||
1195                              strcmp(codeset, "EUCJPMS") == 0){
1196                         input_f = EUC_INPUT;
1197 #ifdef SHIFTJIS_CP932
1198                         cp51932_f = FALSE;
1199 #endif
1200 #ifdef UTF8_OUTPUT_ENABLE
1201                         ms_ucs_map_f = UCS_MAP_MS;
1202 #endif
1203                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1204                              strcmp(codeset, "EUCJP-ASCII") == 0){
1205                         input_f = EUC_INPUT;
1206 #ifdef SHIFTJIS_CP932
1207                         cp51932_f = FALSE;
1208 #endif
1209 #ifdef UTF8_OUTPUT_ENABLE
1210                         ms_ucs_map_f = UCS_MAP_ASCII;
1211 #endif
1212                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1213                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1214                         input_f = SJIS_INPUT;
1215                         x0213_f = TRUE;
1216 #ifdef SHIFTJIS_CP932
1217                         cp51932_f = FALSE;
1218 #endif
1219                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1220                              strcmp(codeset, "EUC-JIS-2004") == 0){
1221                         input_f = EUC_INPUT;
1222                         x0213_f = TRUE;
1223 #ifdef SHIFTJIS_CP932
1224                         cp51932_f = FALSE;
1225 #endif
1226 #ifdef UTF8_INPUT_ENABLE
1227                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1228                              strcmp(codeset, "UTF-8N") == 0 ||
1229                              strcmp(codeset, "UTF-8-BOM") == 0){
1230                         input_f = UTF8_INPUT;
1231 #ifdef UNICODE_NORMALIZATION
1232                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1233                              strcmp(codeset, "UTF-8-MAC") == 0){
1234                         input_f = UTF8_INPUT;
1235                         nfc_f = TRUE;
1236 #endif
1237                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1238                              strcmp(codeset, "UTF-16BE") == 0 ||
1239                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1240                         input_f = UTF16_INPUT;
1241                         input_endian = ENDIAN_BIG;
1242                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1243                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1244                         input_f = UTF16_INPUT;
1245                         input_endian = ENDIAN_LITTLE;
1246                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1247                              strcmp(codeset, "UTF-32BE") == 0 ||
1248                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1249                         input_f = UTF32_INPUT;
1250                         input_endian = ENDIAN_BIG;
1251                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1252                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1253                         input_f = UTF32_INPUT;
1254                         input_endian = ENDIAN_LITTLE;
1255 #endif
1256                     }
1257                     continue;
1258                 }
1259                 if (strcmp(long_option[i].name, "oc=") == 0){
1260                     x0201_f = FALSE;
1261                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1262                         codeset[i] = nkf_toupper(p[i]);
1263                     }
1264                     codeset[i] = 0;
1265                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1266                         output_conv = j_oconv;
1267                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1268                         output_conv = j_oconv;
1269                         no_cp932ext_f = TRUE;
1270 #ifdef SHIFTJIS_CP932
1271                         cp932inv_f = FALSE;
1272 #endif
1273 #ifdef UTF8_OUTPUT_ENABLE
1274                         ms_ucs_map_f = UCS_MAP_CP932;
1275 #endif
1276                     }else if(strcmp(codeset, "CP50220") == 0){
1277                         output_conv = j_oconv;
1278                         x0201_f = TRUE;
1279 #ifdef SHIFTJIS_CP932
1280                         cp932inv_f = FALSE;
1281 #endif
1282 #ifdef UTF8_OUTPUT_ENABLE
1283                         ms_ucs_map_f = UCS_MAP_CP932;
1284 #endif
1285                     }else if(strcmp(codeset, "CP50221") == 0){
1286                         output_conv = j_oconv;
1287 #ifdef SHIFTJIS_CP932
1288                         cp932inv_f = FALSE;
1289 #endif
1290 #ifdef UTF8_OUTPUT_ENABLE
1291                         ms_ucs_map_f = UCS_MAP_CP932;
1292 #endif
1293                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1294                         output_conv = j_oconv;
1295 #ifdef X0212_ENABLE
1296                         x0212_f = TRUE;
1297 #endif
1298 #ifdef SHIFTJIS_CP932
1299                         cp932inv_f = FALSE;
1300 #endif
1301                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1302                         output_conv = j_oconv;
1303 #ifdef X0212_ENABLE
1304                         x0212_f = TRUE;
1305 #endif
1306                         x0213_f = TRUE;
1307 #ifdef SHIFTJIS_CP932
1308                         cp932inv_f = FALSE;
1309 #endif
1310                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1311                         output_conv = s_oconv;
1312                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1313                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1314                              strcmp(codeset, "CP932") == 0 ||
1315                              strcmp(codeset, "MS932") == 0){
1316                         output_conv = s_oconv;
1317 #ifdef UTF8_OUTPUT_ENABLE
1318                         ms_ucs_map_f = UCS_MAP_CP932;
1319 #endif
1320                     }else if(strcmp(codeset, "CP10001") == 0){
1321                         output_conv = s_oconv;
1322 #ifdef UTF8_OUTPUT_ENABLE
1323                         ms_ucs_map_f = UCS_MAP_CP10001;
1324 #endif
1325                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1326                              strcmp(codeset, "EUC-JP") == 0){
1327                         output_conv = e_oconv;
1328                     }else if(strcmp(codeset, "CP51932") == 0){
1329                         output_conv = e_oconv;
1330 #ifdef SHIFTJIS_CP932
1331                         cp932inv_f = FALSE;
1332 #endif
1333 #ifdef UTF8_OUTPUT_ENABLE
1334                         ms_ucs_map_f = UCS_MAP_CP932;
1335 #endif
1336                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1337                              strcmp(codeset, "EUCJP-MS") == 0 ||
1338                              strcmp(codeset, "EUCJPMS") == 0){
1339                         output_conv = e_oconv;
1340 #ifdef X0212_ENABLE
1341                         x0212_f = TRUE;
1342 #endif
1343 #ifdef UTF8_OUTPUT_ENABLE
1344                         ms_ucs_map_f = UCS_MAP_MS;
1345 #endif
1346                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1347                              strcmp(codeset, "EUCJP-ASCII") == 0){
1348                         output_conv = e_oconv;
1349 #ifdef X0212_ENABLE
1350                         x0212_f = TRUE;
1351 #endif
1352 #ifdef UTF8_OUTPUT_ENABLE
1353                         ms_ucs_map_f = UCS_MAP_ASCII;
1354 #endif
1355                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1356                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1357                         output_conv = s_oconv;
1358                         x0213_f = TRUE;
1359 #ifdef SHIFTJIS_CP932
1360                         cp932inv_f = FALSE;
1361 #endif
1362                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1363                              strcmp(codeset, "EUC-JIS-2004") == 0){
1364                         output_conv = e_oconv;
1365 #ifdef X0212_ENABLE
1366                         x0212_f = TRUE;
1367 #endif
1368                         x0213_f = TRUE;
1369 #ifdef SHIFTJIS_CP932
1370                         cp932inv_f = FALSE;
1371 #endif
1372 #ifdef UTF8_OUTPUT_ENABLE
1373                     }else if(strcmp(codeset, "UTF-8") == 0){
1374                         output_conv = w_oconv;
1375                     }else if(strcmp(codeset, "UTF-8N") == 0){
1376                         output_conv = w_oconv;
1377                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1378                         output_conv = w_oconv;
1379                         output_bom_f = TRUE;
1380                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1381                         output_conv = w_oconv16;
1382                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1383                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1384                         output_conv = w_oconv16;
1385                         output_bom_f = TRUE;
1386                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1387                         output_conv = w_oconv16;
1388                         output_endian = ENDIAN_LITTLE;
1389                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1390                         output_conv = w_oconv16;
1391                         output_endian = ENDIAN_LITTLE;
1392                         output_bom_f = TRUE;
1393                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1394                              strcmp(codeset, "UTF-32BE") == 0){
1395                         output_conv = w_oconv32;
1396                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1397                         output_conv = w_oconv32;
1398                         output_bom_f = TRUE;
1399                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1400                         output_conv = w_oconv32;
1401                         output_endian = ENDIAN_LITTLE;
1402                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1403                         output_conv = w_oconv32;
1404                         output_endian = ENDIAN_LITTLE;
1405                         output_bom_f = TRUE;
1406 #endif
1407                     }
1408                     continue;
1409                 }
1410 #ifdef OVERWRITE
1411                 if (strcmp(long_option[i].name, "overwrite") == 0){
1412                     file_out_f = TRUE;
1413                     overwrite_f = TRUE;
1414                     preserve_time_f = TRUE;
1415                     continue;
1416                 }
1417                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1418                     file_out_f = TRUE;
1419                     overwrite_f = TRUE;
1420                     preserve_time_f = TRUE;
1421                     backup_f = TRUE;
1422                     backup_suffix = malloc(strlen((char *) p) + 1);
1423                     strcpy(backup_suffix, (char *) p);
1424                     continue;
1425                 }
1426                 if (strcmp(long_option[i].name, "in-place") == 0){
1427                     file_out_f = TRUE;
1428                     overwrite_f = TRUE;
1429                     preserve_time_f = FALSE;
1430                     continue;
1431                 }
1432                 if (strcmp(long_option[i].name, "in-place=") == 0){
1433                     file_out_f = TRUE;
1434                     overwrite_f = TRUE;
1435                     preserve_time_f = FALSE;
1436                     backup_f = TRUE;
1437                     backup_suffix = malloc(strlen((char *) p) + 1);
1438                     strcpy(backup_suffix, (char *) p);
1439                     continue;
1440                 }
1441 #endif
1442 #ifdef INPUT_OPTION
1443                 if (strcmp(long_option[i].name, "cap-input") == 0){
1444                     cap_f = TRUE;
1445                     continue;
1446                 }
1447                 if (strcmp(long_option[i].name, "url-input") == 0){
1448                     url_f = TRUE;
1449                     continue;
1450                 }
1451 #endif
1452 #ifdef NUMCHAR_OPTION
1453                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1454                     numchar_f = TRUE;
1455                     continue;
1456                 }
1457 #endif
1458 #ifdef CHECK_OPTION
1459                 if (strcmp(long_option[i].name, "no-output") == 0){
1460                     noout_f = TRUE;
1461                     continue;
1462                 }
1463                 if (strcmp(long_option[i].name, "debug") == 0){
1464                     debug_f = TRUE;
1465                     continue;
1466                 }
1467 #endif
1468                 if (strcmp(long_option[i].name, "cp932") == 0){
1469 #ifdef SHIFTJIS_CP932
1470                     cp51932_f = TRUE;
1471                     cp932inv_f = TRUE;
1472 #endif
1473 #ifdef UTF8_OUTPUT_ENABLE
1474                     ms_ucs_map_f = UCS_MAP_CP932;
1475 #endif
1476                     continue;
1477                 }
1478                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1479 #ifdef SHIFTJIS_CP932
1480                     cp51932_f = FALSE;
1481                     cp932inv_f = FALSE;
1482 #endif
1483 #ifdef UTF8_OUTPUT_ENABLE
1484                     ms_ucs_map_f = UCS_MAP_ASCII;
1485 #endif
1486                     continue;
1487                 }
1488 #ifdef SHIFTJIS_CP932
1489                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1490                     cp932inv_f = TRUE;
1491                     continue;
1492                 }
1493 #endif
1494
1495 #ifdef X0212_ENABLE
1496                 if (strcmp(long_option[i].name, "x0212") == 0){
1497                     x0212_f = TRUE;
1498                     continue;
1499                 }
1500 #endif
1501
1502 #ifdef EXEC_IO
1503                   if (strcmp(long_option[i].name, "exec-in") == 0){
1504                       exec_f = 1;
1505                       return;
1506                   }
1507                   if (strcmp(long_option[i].name, "exec-out") == 0){
1508                       exec_f = -1;
1509                       return;
1510                   }
1511 #endif
1512 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1513                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1514                     no_cp932ext_f = TRUE;
1515                     continue;
1516                 }
1517                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1518                     no_best_fit_chars_f = TRUE;
1519                     continue;
1520                 }
1521                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1522                     encode_fallback = NULL;
1523                     continue;
1524                 }
1525                 if (strcmp(long_option[i].name, "fb-html") == 0){
1526                     encode_fallback = encode_fallback_html;
1527                     continue;
1528                 }
1529                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1530                     encode_fallback = encode_fallback_xml;
1531                     continue;
1532                 }
1533                 if (strcmp(long_option[i].name, "fb-java") == 0){
1534                     encode_fallback = encode_fallback_java;
1535                     continue;
1536                 }
1537                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1538                     encode_fallback = encode_fallback_perl;
1539                     continue;
1540                 }
1541                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1542                     encode_fallback = encode_fallback_subchar;
1543                     continue;
1544                 }
1545                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1546                     encode_fallback = encode_fallback_subchar;
1547                     unicode_subchar = 0;
1548                     if (p[0] != '0'){
1549                         /* decimal number */
1550                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1551                             unicode_subchar *= 10;
1552                             unicode_subchar += hex2bin(p[i]);
1553                         }
1554                     }else if(p[1] == 'x' || p[1] == 'X'){
1555                         /* hexadecimal number */
1556                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1557                             unicode_subchar <<= 4;
1558                             unicode_subchar |= hex2bin(p[i]);
1559                         }
1560                     }else{
1561                         /* octal number */
1562                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1563                             unicode_subchar *= 8;
1564                             unicode_subchar += hex2bin(p[i]);
1565                         }
1566                     }
1567                     w16e_conv(unicode_subchar, &i, &j);
1568                     unicode_subchar = i<<8 | j;
1569                     continue;
1570                 }
1571 #endif
1572 #ifdef UTF8_OUTPUT_ENABLE
1573                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1574                     ms_ucs_map_f = UCS_MAP_MS;
1575                     continue;
1576                 }
1577 #endif
1578 #ifdef UNICODE_NORMALIZATION
1579                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1580                     input_f = UTF8_INPUT;
1581                     nfc_f = TRUE;
1582                     continue;
1583                 }
1584 #endif
1585                 if (strcmp(long_option[i].name, "prefix=") == 0){
1586                     if (nkf_isgraph(p[0])){
1587                         for (i = 1; nkf_isgraph(p[i]); i++){
1588                             prefix_table[p[i]] = p[0];
1589                         }
1590                     }
1591                     continue;
1592                 }
1593             }
1594             continue;
1595         case 'b':           /* buffered mode */
1596             unbuf_f = FALSE;
1597             continue;
1598         case 'u':           /* non bufferd mode */
1599             unbuf_f = TRUE;
1600             continue;
1601         case 't':           /* transparent mode */
1602             if (*cp=='1') {
1603                 /* alias of -t */
1604                 nop_f = TRUE;
1605                 *cp++;
1606             } else if (*cp=='2') {
1607                 /*
1608                  * -t with put/get
1609                  *
1610                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1611                  *
1612                  */
1613                 nop_f = 2;
1614                 *cp++;
1615             } else
1616                 nop_f = TRUE;
1617             continue;
1618         case 'j':           /* JIS output */
1619         case 'n':
1620             output_conv = j_oconv;
1621             continue;
1622         case 'e':           /* AT&T EUC output */
1623             output_conv = e_oconv;
1624             cp932inv_f = FALSE;
1625             continue;
1626         case 's':           /* SJIS output */
1627             output_conv = s_oconv;
1628             continue;
1629         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1630             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1631             input_f = LATIN1_INPUT;
1632             continue;
1633         case 'i':           /* Kanji IN ESC-$-@/B */
1634             if (*cp=='@'||*cp=='B')
1635                 kanji_intro = *cp++;
1636             continue;
1637         case 'o':           /* ASCII IN ESC-(-J/B */
1638             if (*cp=='J'||*cp=='B'||*cp=='H')
1639                 ascii_intro = *cp++;
1640             continue;
1641         case 'h':
1642             /*
1643                 bit:1   katakana->hiragana
1644                 bit:2   hiragana->katakana
1645             */
1646             if ('9'>= *cp && *cp>='0')
1647                 hira_f |= (*cp++ -'0');
1648             else
1649                 hira_f |= 1;
1650             continue;
1651         case 'r':
1652             rot_f = TRUE;
1653             continue;
1654 #if defined(MSDOS) || defined(__OS2__)
1655         case 'T':
1656             binmode_f = FALSE;
1657             continue;
1658 #endif
1659 #ifndef PERL_XS
1660         case 'V':
1661             version();
1662             exit(1);
1663             break;
1664         case 'v':
1665             usage();
1666             exit(1);
1667             break;
1668 #endif
1669 #ifdef UTF8_OUTPUT_ENABLE
1670         case 'w':           /* UTF-8 output */
1671             if (cp[0] == '8') {
1672                 output_conv = w_oconv; cp++;
1673                 if (cp[0] == '0'){
1674                     cp++;
1675                 } else {
1676                     output_bom_f = TRUE;
1677                 }
1678             } else {
1679                 if ('1'== cp[0] && '6'==cp[1]) {
1680                     output_conv = w_oconv16; cp+=2;
1681                 } else if ('3'== cp[0] && '2'==cp[1]) {
1682                     output_conv = w_oconv32; cp+=2;
1683                 } else {
1684                     output_conv = w_oconv;
1685                     continue;
1686                 }
1687                 if (cp[0]=='L') {
1688                     cp++;
1689                     output_endian = ENDIAN_LITTLE;
1690                 } else if (cp[0] == 'B') {
1691                     cp++;
1692                 } else {
1693                     continue;
1694                 }
1695                 if (cp[0] == '0'){
1696                     cp++;
1697                 } else {
1698                     output_bom_f = TRUE;
1699                 }
1700             }
1701             continue;
1702 #endif
1703 #ifdef UTF8_INPUT_ENABLE
1704         case 'W':           /* UTF input */
1705             if (cp[0] == '8') {
1706                 cp++;
1707                 input_f = UTF8_INPUT;
1708             }else{
1709                 if ('1'== cp[0] && '6'==cp[1]) {
1710                     cp += 2;
1711                     input_f = UTF16_INPUT;
1712                     input_endian = ENDIAN_BIG;
1713                 } else if ('3'== cp[0] && '2'==cp[1]) {
1714                     cp += 2;
1715                     input_f = UTF32_INPUT;
1716                     input_endian = ENDIAN_BIG;
1717                 } else {
1718                     input_f = UTF8_INPUT;
1719                     continue;
1720                 }
1721                 if (cp[0]=='L') {
1722                     cp++;
1723                     input_endian = ENDIAN_LITTLE;
1724                 } else if (cp[0] == 'B') {
1725                     cp++;
1726                 }
1727             }
1728             continue;
1729 #endif
1730         /* Input code assumption */
1731         case 'J':   /* JIS input */
1732             input_f = JIS_INPUT;
1733             continue;
1734         case 'E':   /* AT&T EUC input */
1735             input_f = EUC_INPUT;
1736             continue;
1737         case 'S':   /* MS Kanji input */
1738             input_f = SJIS_INPUT;
1739             if (x0201_f==NO_X0201) x0201_f=TRUE;
1740             continue;
1741         case 'Z':   /* Convert X0208 alphabet to asii */
1742             /* alpha_f
1743                bit:0   Convert JIS X 0208 Alphabet to ASCII
1744                bit:1   Convert Kankaku to one space
1745                bit:2   Convert Kankaku to two spaces
1746                bit:3   Convert HTML Entity
1747                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1748             */
1749             while ('0'<= *cp && *cp <='9') {
1750                 alpha_f |= 1 << (*cp++ - '0');
1751             }
1752             if (!alpha_f) alpha_f = 1;
1753             continue;
1754         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1755             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1756             /* accept  X0201
1757                     ESC-(-I     in JIS, EUC, MS Kanji
1758                     SI/SO       in JIS, EUC, MS Kanji
1759                     SSO         in EUC, JIS, not in MS Kanji
1760                     MS Kanji (0xa0-0xdf)
1761                output  X0201
1762                     ESC-(-I     in JIS (0x20-0x5f)
1763                     SSO         in EUC (0xa0-0xdf)
1764                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1765             */
1766             continue;
1767         case 'X':   /* Assume X0201 kana */
1768             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1769             x0201_f = TRUE;
1770             continue;
1771         case 'F':   /* prserve new lines */
1772             fold_preserve_f = TRUE;
1773         case 'f':   /* folding -f60 or -f */
1774             fold_f = TRUE;
1775             fold_len = 0;
1776             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1777                 fold_len *= 10;
1778                 fold_len += *cp++ - '0';
1779             }
1780             if (!(0<fold_len && fold_len<BUFSIZ))
1781                 fold_len = DEFAULT_FOLD;
1782             if (*cp=='-') {
1783                 fold_margin = 0;
1784                 cp++;
1785                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1786                     fold_margin *= 10;
1787                     fold_margin += *cp++ - '0';
1788                 }
1789             }
1790             continue;
1791         case 'm':   /* MIME support */
1792             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1793             if (*cp=='B'||*cp=='Q') {
1794                 mime_decode_mode = *cp++;
1795                 mimebuf_f = FIXED_MIME;
1796             } else if (*cp=='N') {
1797                 mime_f = TRUE; cp++;
1798             } else if (*cp=='S') {
1799                 mime_f = STRICT_MIME; cp++;
1800             } else if (*cp=='0') {
1801                 mime_decode_f = FALSE;
1802                 mime_f = FALSE; cp++;
1803             }
1804             continue;
1805         case 'M':   /* MIME output */
1806             if (*cp=='B') {
1807                 mimeout_mode = 'B';
1808                 mimeout_f = FIXED_MIME; cp++;
1809             } else if (*cp=='Q') {
1810                 mimeout_mode = 'Q';
1811                 mimeout_f = FIXED_MIME; cp++;
1812             } else {
1813                 mimeout_f = TRUE;
1814             }
1815             continue;
1816         case 'B':   /* Broken JIS support */
1817             /*  bit:0   no ESC JIS
1818                 bit:1   allow any x on ESC-(-x or ESC-$-x
1819                 bit:2   reset to ascii on NL
1820             */
1821             if ('9'>= *cp && *cp>='0')
1822                 broken_f |= 1<<(*cp++ -'0');
1823             else
1824                 broken_f |= TRUE;
1825             continue;
1826 #ifndef PERL_XS
1827         case 'O':/* for Output file */
1828             file_out_f = TRUE;
1829             continue;
1830 #endif
1831         case 'c':/* add cr code */
1832             nlmode_f = CRLF;
1833             continue;
1834         case 'd':/* delete cr code */
1835             nlmode_f = LF;
1836             continue;
1837         case 'I':   /* ISO-2022-JP output */
1838             iso2022jp_f = TRUE;
1839             continue;
1840         case 'L':  /* line mode */
1841             if (*cp=='u') {         /* unix */
1842                 nlmode_f = LF; cp++;
1843             } else if (*cp=='m') { /* mac */
1844                 nlmode_f = CR; cp++;
1845             } else if (*cp=='w') { /* windows */
1846                 nlmode_f = CRLF; cp++;
1847             } else if (*cp=='0') { /* no conversion  */
1848                 nlmode_f = 0; cp++;
1849             }
1850             continue;
1851         case 'g':
1852 #ifndef PERL_XS
1853             guess_f = TRUE;
1854 #endif
1855             continue;
1856         case SP:
1857         /* module muliple options in a string are allowed for Perl moudle  */
1858             while(*cp && *cp++!='-');
1859             continue;
1860         default:
1861             /* bogus option but ignored */
1862             continue;
1863         }
1864     }
1865 }
1866
1867 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1868 {
1869     if (iconv_func){
1870         struct input_code *p = input_code_list;
1871         while (p->name){
1872             if (iconv_func == p->iconv_func){
1873                 return p;
1874             }
1875             p++;
1876         }
1877     }
1878     return 0;
1879 }
1880
1881 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1882 {
1883 #ifdef INPUT_CODE_FIX
1884     if (f || !input_f)
1885 #endif
1886         if (estab_f != f){
1887             estab_f = f;
1888         }
1889
1890     if (iconv_func
1891 #ifdef INPUT_CODE_FIX
1892         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1893 #endif
1894         ){
1895         iconv = iconv_func;
1896     }
1897 #ifdef CHECK_OPTION
1898     if (estab_f && iconv_for_check != iconv){
1899         struct input_code *p = find_inputcode_byfunc(iconv);
1900         if (p){
1901             set_input_codename(p->name);
1902             debug(p->name);
1903         }
1904         iconv_for_check = iconv;
1905     }
1906 #endif
1907 }
1908
1909 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1910 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1911 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1912 #ifdef SHIFTJIS_CP932
1913 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B */
1914 #define SCORE_NO_EXIST (SCORE_CP932 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1915 #else
1916 #define SCORE_NO_EXIST (SCORE_DEPEND << 1)   /* \e$BB8:_$7$J$$J8;z\e(B */
1917 #endif
1918 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1919 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1920
1921 #define SCORE_INIT (SCORE_iMIME)
1922
1923 static const char score_table_A0[] = {
1924     0, 0, 0, 0,
1925     0, 0, 0, 0,
1926     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1927     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1928 };
1929
1930 static const char score_table_F0[] = {
1931     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1932     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1933     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1934     SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1935 };
1936
1937 void set_code_score(struct input_code *ptr, nkf_char score)
1938 {
1939     if (ptr){
1940         ptr->score |= score;
1941     }
1942 }
1943
1944 void clr_code_score(struct input_code *ptr, nkf_char score)
1945 {
1946     if (ptr){
1947         ptr->score &= ~score;
1948     }
1949 }
1950
1951 void code_score(struct input_code *ptr)
1952 {
1953     nkf_char c2 = ptr->buf[0];
1954 #ifdef UTF8_OUTPUT_ENABLE
1955     nkf_char c1 = ptr->buf[1];
1956 #endif
1957     if (c2 < 0){
1958         set_code_score(ptr, SCORE_ERROR);
1959     }else if (c2 == SSO){
1960         set_code_score(ptr, SCORE_KANA);
1961 #ifdef UTF8_OUTPUT_ENABLE
1962     }else if (!e2w_conv(c2, c1)){
1963         set_code_score(ptr, SCORE_NO_EXIST);
1964 #endif
1965     }else if ((c2 & 0x70) == 0x20){
1966         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1967     }else if ((c2 & 0x70) == 0x70){
1968         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1969     }else if ((c2 & 0x70) >= 0x50){
1970         set_code_score(ptr, SCORE_L2);
1971     }
1972 }
1973
1974 void status_disable(struct input_code *ptr)
1975 {
1976     ptr->stat = -1;
1977     ptr->buf[0] = -1;
1978     code_score(ptr);
1979     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1980 }
1981
1982 void status_push_ch(struct input_code *ptr, nkf_char c)
1983 {
1984     ptr->buf[ptr->index++] = c;
1985 }
1986
1987 void status_clear(struct input_code *ptr)
1988 {
1989     ptr->stat = 0;
1990     ptr->index = 0;
1991 }
1992
1993 void status_reset(struct input_code *ptr)
1994 {
1995     status_clear(ptr);
1996     ptr->score = SCORE_INIT;
1997 }
1998
1999 void status_reinit(struct input_code *ptr)
2000 {
2001     status_reset(ptr);
2002     ptr->_file_stat = 0;
2003 }
2004
2005 void status_check(struct input_code *ptr, nkf_char c)
2006 {
2007     if (c <= DEL && estab_f){
2008         status_reset(ptr);
2009     }
2010 }
2011
2012 void s_status(struct input_code *ptr, nkf_char c)
2013 {
2014     switch(ptr->stat){
2015       case -1:
2016           status_check(ptr, c);
2017           break;
2018       case 0:
2019           if (c <= DEL){
2020               break;
2021 #ifdef NUMCHAR_OPTION
2022           }else if (is_unicode_capsule(c)){
2023               break;
2024 #endif
2025           }else if (0xa1 <= c && c <= 0xdf){
2026               status_push_ch(ptr, SSO);
2027               status_push_ch(ptr, c);
2028               code_score(ptr);
2029               status_clear(ptr);
2030           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2031               ptr->stat = 1;
2032               status_push_ch(ptr, c);
2033 #ifdef SHIFTJIS_CP932
2034           }else if (cp51932_f
2035                     && is_ibmext_in_sjis(c)){
2036               ptr->stat = 2;
2037               status_push_ch(ptr, c);
2038 #endif /* SHIFTJIS_CP932 */
2039 #ifdef X0212_ENABLE
2040           }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2041               ptr->stat = 1;
2042               status_push_ch(ptr, c);
2043 #endif /* X0212_ENABLE */
2044           }else{
2045               status_disable(ptr);
2046           }
2047           break;
2048       case 1:
2049           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2050               status_push_ch(ptr, c);
2051               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2052               code_score(ptr);
2053               status_clear(ptr);
2054           }else{
2055               status_disable(ptr);
2056           }
2057           break;
2058       case 2:
2059 #ifdef SHIFTJIS_CP932
2060           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2061               status_push_ch(ptr, c);
2062               if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2063                   set_code_score(ptr, SCORE_CP932);
2064                   status_clear(ptr);
2065                   break;
2066               }
2067           }
2068 #endif /* SHIFTJIS_CP932 */
2069 #ifndef X0212_ENABLE
2070           status_disable(ptr);
2071 #endif
2072           break;
2073     }
2074 }
2075
2076 void e_status(struct input_code *ptr, nkf_char c)
2077 {
2078     switch (ptr->stat){
2079       case -1:
2080           status_check(ptr, c);
2081           break;
2082       case 0:
2083           if (c <= DEL){
2084               break;
2085 #ifdef NUMCHAR_OPTION
2086           }else if (is_unicode_capsule(c)){
2087               break;
2088 #endif
2089           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2090               ptr->stat = 1;
2091               status_push_ch(ptr, c);
2092 #ifdef X0212_ENABLE
2093           }else if (0x8f == c){
2094               ptr->stat = 2;
2095               status_push_ch(ptr, c);
2096 #endif /* X0212_ENABLE */
2097           }else{
2098               status_disable(ptr);
2099           }
2100           break;
2101       case 1:
2102           if (0xa1 <= c && c <= 0xfe){
2103               status_push_ch(ptr, c);
2104               code_score(ptr);
2105               status_clear(ptr);
2106           }else{
2107               status_disable(ptr);
2108           }
2109           break;
2110 #ifdef X0212_ENABLE
2111       case 2:
2112           if (0xa1 <= c && c <= 0xfe){
2113               ptr->stat = 1;
2114               status_push_ch(ptr, c);
2115           }else{
2116               status_disable(ptr);
2117           }
2118 #endif /* X0212_ENABLE */
2119     }
2120 }
2121
2122 #ifdef UTF8_INPUT_ENABLE
2123 void w_status(struct input_code *ptr, nkf_char c)
2124 {
2125     switch (ptr->stat){
2126       case -1:
2127           status_check(ptr, c);
2128           break;
2129       case 0:
2130           if (c <= DEL){
2131               break;
2132 #ifdef NUMCHAR_OPTION
2133           }else if (is_unicode_capsule(c)){
2134               break;
2135 #endif
2136           }else if (0xc0 <= c && c <= 0xdf){
2137               ptr->stat = 1;
2138               status_push_ch(ptr, c);
2139           }else if (0xe0 <= c && c <= 0xef){
2140               ptr->stat = 2;
2141               status_push_ch(ptr, c);
2142           }else if (0xf0 <= c && c <= 0xf4){
2143               ptr->stat = 3;
2144               status_push_ch(ptr, c);
2145           }else{
2146               status_disable(ptr);
2147           }
2148           break;
2149       case 1:
2150       case 2:
2151           if (0x80 <= c && c <= 0xbf){
2152               status_push_ch(ptr, c);
2153               if (ptr->index > ptr->stat){
2154                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2155                              && ptr->buf[2] == 0xbf);
2156                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2157                            &ptr->buf[0], &ptr->buf[1]);
2158                   if (!bom){
2159                       code_score(ptr);
2160                   }
2161                   status_clear(ptr);
2162               }
2163           }else{
2164               status_disable(ptr);
2165           }
2166           break;
2167       case 3:
2168         if (0x80 <= c && c <= 0xbf){
2169             if (ptr->index < ptr->stat){
2170                 status_push_ch(ptr, c);
2171             } else {
2172                 status_clear(ptr);
2173             }
2174           }else{
2175               status_disable(ptr);
2176           }
2177           break;
2178     }
2179 }
2180 #endif
2181
2182 void code_status(nkf_char c)
2183 {
2184     int action_flag = 1;
2185     struct input_code *result = 0;
2186     struct input_code *p = input_code_list;
2187     while (p->name){
2188         if (!p->status_func) {
2189             ++p;
2190             continue;
2191         }
2192         if (!p->status_func)
2193             continue;
2194         (p->status_func)(p, c);
2195         if (p->stat > 0){
2196             action_flag = 0;
2197         }else if(p->stat == 0){
2198             if (result){
2199                 action_flag = 0;
2200             }else{
2201                 result = p;
2202             }
2203         }
2204         ++p;
2205     }
2206
2207     if (action_flag){
2208         if (result && !estab_f){
2209             set_iconv(TRUE, result->iconv_func);
2210         }else if (c <= DEL){
2211             struct input_code *ptr = input_code_list;
2212             while (ptr->name){
2213                 status_reset(ptr);
2214                 ++ptr;
2215             }
2216         }
2217     }
2218 }
2219
2220 #ifndef WIN32DLL
2221 nkf_char std_getc(FILE *f)
2222 {
2223     if (std_gc_ndx){
2224         return std_gc_buf[--std_gc_ndx];
2225     }
2226     return getc(f);
2227 }
2228 #endif /*WIN32DLL*/
2229
2230 nkf_char std_ungetc(nkf_char c, FILE *f)
2231 {
2232     if (std_gc_ndx == STD_GC_BUFSIZE){
2233         return EOF;
2234     }
2235     std_gc_buf[std_gc_ndx++] = c;
2236     return c;
2237 }
2238
2239 #ifndef WIN32DLL
2240 void std_putc(nkf_char c)
2241 {
2242     if(c!=EOF)
2243       putchar(c);
2244 }
2245 #endif /*WIN32DLL*/
2246
2247 #if !defined(PERL_XS) && !defined(WIN32DLL)
2248 nkf_char noconvert(FILE *f)
2249 {
2250     nkf_char    c;
2251
2252     if (nop_f == 2)
2253         module_connection();
2254     while ((c = (*i_getc)(f)) != EOF)
2255       (*o_putc)(c);
2256     (*o_putc)(EOF);
2257     return 1;
2258 }
2259 #endif
2260
2261 void module_connection(void)
2262 {
2263     oconv = output_conv;
2264     o_putc = std_putc;
2265
2266     /* replace continucation module, from output side */
2267
2268     /* output redicrection */
2269 #ifdef CHECK_OPTION
2270     if (noout_f || guess_f){
2271         o_putc = no_putc;
2272     }
2273 #endif
2274     if (mimeout_f) {
2275         o_mputc = o_putc;
2276         o_putc = mime_putc;
2277         if (mimeout_f == TRUE) {
2278             o_base64conv = oconv; oconv = base64_conv;
2279         }
2280         /* base64_count = 0; */
2281     }
2282
2283     if (nlmode_f || guess_f) {
2284         o_nlconv = oconv; oconv = nl_conv;
2285     }
2286     if (rot_f) {
2287         o_rot_conv = oconv; oconv = rot_conv;
2288     }
2289     if (iso2022jp_f) {
2290         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2291     }
2292     if (hira_f) {
2293         o_hira_conv = oconv; oconv = hira_conv;
2294     }
2295     if (fold_f) {
2296         o_fconv = oconv; oconv = fold_conv;
2297         f_line = 0;
2298     }
2299     if (alpha_f || x0201_f) {
2300         o_zconv = oconv; oconv = z_conv;
2301     }
2302
2303     i_getc = std_getc;
2304     i_ungetc = std_ungetc;
2305     /* input redicrection */
2306 #ifdef INPUT_OPTION
2307     if (cap_f){
2308         i_cgetc = i_getc; i_getc = cap_getc;
2309         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2310     }
2311     if (url_f){
2312         i_ugetc = i_getc; i_getc = url_getc;
2313         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2314     }
2315 #endif
2316 #ifdef NUMCHAR_OPTION
2317     if (numchar_f){
2318         i_ngetc = i_getc; i_getc = numchar_getc;
2319         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2320     }
2321 #endif
2322 #ifdef UNICODE_NORMALIZATION
2323     if (nfc_f && input_f == UTF8_INPUT){
2324         i_nfc_getc = i_getc; i_getc = nfc_getc;
2325         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2326     }
2327 #endif
2328     if (mime_f && mimebuf_f==FIXED_MIME) {
2329         i_mgetc = i_getc; i_getc = mime_getc;
2330         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2331     }
2332     if (broken_f & 1) {
2333         i_bgetc = i_getc; i_getc = broken_getc;
2334         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2335     }
2336     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2337         set_iconv(-TRUE, e_iconv);
2338     } else if (input_f == SJIS_INPUT) {
2339         set_iconv(-TRUE, s_iconv);
2340 #ifdef UTF8_INPUT_ENABLE
2341     } else if (input_f == UTF8_INPUT) {
2342         set_iconv(-TRUE, w_iconv);
2343     } else if (input_f == UTF16_INPUT) {
2344         set_iconv(-TRUE, w_iconv16);
2345     } else if (input_f == UTF32_INPUT) {
2346         set_iconv(-TRUE, w_iconv32);
2347 #endif
2348     } else {
2349         set_iconv(FALSE, e_iconv);
2350     }
2351
2352     {
2353         struct input_code *p = input_code_list;
2354         while (p->name){
2355             status_reinit(p++);
2356         }
2357     }
2358 }
2359
2360 /*
2361  * Check and Ignore BOM
2362  */
2363 void check_bom(FILE *f)
2364 {
2365     int c2;
2366     switch(c2 = (*i_getc)(f)){
2367     case 0x00:
2368         if((c2 = (*i_getc)(f)) == 0x00){
2369             if((c2 = (*i_getc)(f)) == 0xFE){
2370                 if((c2 = (*i_getc)(f)) == 0xFF){
2371                     if(!input_f){
2372                         set_iconv(TRUE, w_iconv32);
2373                     }
2374                     if (iconv == w_iconv32) {
2375                         input_endian = ENDIAN_BIG;
2376                         return;
2377                     }
2378                     (*i_ungetc)(0xFF,f);
2379                 }else (*i_ungetc)(c2,f);
2380                 (*i_ungetc)(0xFE,f);
2381             }else if(c2 == 0xFF){
2382                 if((c2 = (*i_getc)(f)) == 0xFE){
2383                     if(!input_f){
2384                         set_iconv(TRUE, w_iconv32);
2385                     }
2386                     if (iconv == w_iconv32) {
2387                         input_endian = ENDIAN_2143;
2388                         return;
2389                     }
2390                     (*i_ungetc)(0xFF,f);
2391                 }else (*i_ungetc)(c2,f);
2392                 (*i_ungetc)(0xFF,f);
2393             }else (*i_ungetc)(c2,f);
2394             (*i_ungetc)(0x00,f);
2395         }else (*i_ungetc)(c2,f);
2396         (*i_ungetc)(0x00,f);
2397         break;
2398     case 0xEF:
2399         if((c2 = (*i_getc)(f)) == 0xBB){
2400             if((c2 = (*i_getc)(f)) == 0xBF){
2401                 if(!input_f){
2402                     set_iconv(TRUE, w_iconv);
2403                 }
2404                 if (iconv == w_iconv) {
2405                     return;
2406                 }
2407                 (*i_ungetc)(0xBF,f);
2408             }else (*i_ungetc)(c2,f);
2409             (*i_ungetc)(0xBB,f);
2410         }else (*i_ungetc)(c2,f);
2411         (*i_ungetc)(0xEF,f);
2412         break;
2413     case 0xFE:
2414         if((c2 = (*i_getc)(f)) == 0xFF){
2415             if((c2 = (*i_getc)(f)) == 0x00){
2416                 if((c2 = (*i_getc)(f)) == 0x00){
2417                     if(!input_f){
2418                         set_iconv(TRUE, w_iconv32);
2419                     }
2420                     if (iconv == w_iconv32) {
2421                         input_endian = ENDIAN_3412;
2422                         return;
2423                     }
2424                     (*i_ungetc)(0x00,f);
2425                 }else (*i_ungetc)(c2,f);
2426                 (*i_ungetc)(0x00,f);
2427             }else (*i_ungetc)(c2,f);
2428             if(!input_f){
2429                 set_iconv(TRUE, w_iconv16);
2430             }
2431             if (iconv == w_iconv16) {
2432                 input_endian = ENDIAN_BIG;
2433                 return;
2434             }
2435             (*i_ungetc)(0xFF,f);
2436         }else (*i_ungetc)(c2,f);
2437         (*i_ungetc)(0xFE,f);
2438         break;
2439     case 0xFF:
2440         if((c2 = (*i_getc)(f)) == 0xFE){
2441             if((c2 = (*i_getc)(f)) == 0x00){
2442                 if((c2 = (*i_getc)(f)) == 0x00){
2443                     if(!input_f){
2444                         set_iconv(TRUE, w_iconv32);
2445                     }
2446                     if (iconv == w_iconv32) {
2447                         input_endian = ENDIAN_LITTLE;
2448                         return;
2449                     }
2450                     (*i_ungetc)(0x00,f);
2451                 }else (*i_ungetc)(c2,f);
2452                 (*i_ungetc)(0x00,f);
2453             }else (*i_ungetc)(c2,f);
2454             if(!input_f){
2455                 set_iconv(TRUE, w_iconv16);
2456             }
2457             if (iconv == w_iconv16) {
2458                 input_endian = ENDIAN_LITTLE;
2459                 return;
2460             }
2461             (*i_ungetc)(0xFE,f);
2462         }else (*i_ungetc)(c2,f);
2463         (*i_ungetc)(0xFF,f);
2464         break;
2465     default:
2466         (*i_ungetc)(c2,f);
2467         break;
2468     }
2469 }
2470
2471 /*
2472    Conversion main loop. Code detection only.
2473  */
2474
2475 nkf_char kanji_convert(FILE *f)
2476 {
2477     nkf_char    c3, c2=0, c1, c0=0;
2478     int is_8bit = FALSE;
2479
2480     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2481 #ifdef UTF8_INPUT_ENABLE
2482        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2483 #endif
2484       ){
2485         is_8bit = TRUE;
2486     }
2487
2488     input_mode = ASCII;
2489     output_mode = ASCII;
2490     shift_mode = FALSE;
2491
2492 #define NEXT continue      /* no output, get next */
2493 #define SEND ;             /* output c1 and c2, get next */
2494 #define LAST break         /* end of loop, go closing  */
2495
2496     module_connection();
2497     check_bom(f);
2498
2499     while ((c1 = (*i_getc)(f)) != EOF) {
2500 #ifdef INPUT_CODE_FIX
2501         if (!input_f)
2502 #endif
2503             code_status(c1);
2504         if (c2) {
2505             /* second byte */
2506             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2507                 /* in case of 8th bit is on */
2508                 if (!estab_f&&!mime_decode_mode) {
2509                     /* in case of not established yet */
2510                     /* It is still ambiguious */
2511                     if (h_conv(f, c2, c1)==EOF)
2512                         LAST;
2513                     else
2514                         c2 = 0;
2515                     NEXT;
2516                 } else {
2517                     /* in case of already established */
2518                     if (c1 < AT) {
2519                         /* ignore bogus code and not CP5022x UCD */
2520                         c2 = 0;
2521                         NEXT;
2522                     } else {
2523                         SEND;
2524                     }
2525                 }
2526             } else
2527                 /* second byte, 7 bit code */
2528                 /* it might be kanji shitfted */
2529                 if ((c1 == DEL) || (c1 <= SP)) {
2530                     /* ignore bogus first code */
2531                     c2 = 0;
2532                     NEXT;
2533                 } else
2534                     SEND;
2535         } else {
2536             /* first byte */
2537 #ifdef UTF8_INPUT_ENABLE
2538             if (iconv == w_iconv16) {
2539                 if (input_endian == ENDIAN_BIG) {
2540                     c2 = c1;
2541                     if ((c1 = (*i_getc)(f)) != EOF) {
2542                         if (0xD8 <= c2 && c2 <= 0xDB) {
2543                             if ((c0 = (*i_getc)(f)) != EOF) {
2544                                 c0 <<= 8;
2545                                 if ((c3 = (*i_getc)(f)) != EOF) {
2546                                     c0 |= c3;
2547                                 } else c2 = EOF;
2548                             } else c2 = EOF;
2549                         }
2550                     } else c2 = EOF;
2551                 } else {
2552                     if ((c2 = (*i_getc)(f)) != EOF) {
2553                         if (0xD8 <= c2 && c2 <= 0xDB) {
2554                             if ((c3 = (*i_getc)(f)) != EOF) {
2555                                 if ((c0 = (*i_getc)(f)) != EOF) {
2556                                     c0 <<= 8;
2557                                     c0 |= c3;
2558                                 } else c2 = EOF;
2559                             } else c2 = EOF;
2560                         }
2561                     } else c2 = EOF;
2562                 }
2563                 SEND;
2564             } else if(iconv == w_iconv32){
2565                 int c3 = c1;
2566                 if((c2 = (*i_getc)(f)) != EOF &&
2567                    (c1 = (*i_getc)(f)) != EOF &&
2568                    (c0 = (*i_getc)(f)) != EOF){
2569                     switch(input_endian){
2570                     case ENDIAN_BIG:
2571                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2572                         break;
2573                     case ENDIAN_LITTLE:
2574                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2575                         break;
2576                     case ENDIAN_2143:
2577                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2578                         break;
2579                     case ENDIAN_3412:
2580                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2581                         break;
2582                     }
2583                     c2 = 0;
2584                 }else{
2585                     c2 = EOF;
2586                 }
2587                 SEND;
2588             } else
2589 #endif
2590 #ifdef NUMCHAR_OPTION
2591             if (is_unicode_capsule(c1)){
2592                 SEND;
2593             } else
2594 #endif
2595             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2596                 /* 8 bit code */
2597                 if (!estab_f && !iso8859_f) {
2598                     /* not established yet */
2599                     c2 = c1;
2600                     NEXT;
2601                 } else { /* estab_f==TRUE */
2602                     if (iso8859_f) {
2603                         c2 = ISO8859_1;
2604                         c1 &= 0x7f;
2605                         SEND;
2606                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2607                         /* SJIS X0201 Case... */
2608                         if(iso2022jp_f && x0201_f==NO_X0201) {
2609                             (*oconv)(GETA1, GETA2);
2610                             NEXT;
2611                         } else {
2612                             c2 = X0201;
2613                             c1 &= 0x7f;
2614                             SEND;
2615                         }
2616                     } else if (c1==SSO && iconv != s_iconv) {
2617                         /* EUC X0201 Case */
2618                         c1 = (*i_getc)(f);  /* skip SSO */
2619                         code_status(c1);
2620                         if (SSP<=c1 && c1<0xe0) {
2621                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2622                                 (*oconv)(GETA1, GETA2);
2623                                 NEXT;
2624                             } else {
2625                                 c2 = X0201;
2626                                 c1 &= 0x7f;
2627                                 SEND;
2628                             }
2629                         } else  { /* bogus code, skip SSO and one byte */
2630                             NEXT;
2631                         }
2632                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2633                                (c1 == 0xFD || c1 == 0xFE)) {
2634                         /* CP10001 */
2635                         c2 = X0201;
2636                         c1 &= 0x7f;
2637                         SEND;
2638                     } else {
2639                        /* already established */
2640                        c2 = c1;
2641                        NEXT;
2642                     }
2643                 }
2644             } else if ((c1 > SP) && (c1 != DEL)) {
2645                 /* in case of Roman characters */
2646                 if (shift_mode) {
2647                     /* output 1 shifted byte */
2648                     if (iso8859_f) {
2649                         c2 = ISO8859_1;
2650                         SEND;
2651                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2652                       /* output 1 shifted byte */
2653                         if(iso2022jp_f && x0201_f==NO_X0201) {
2654                             (*oconv)(GETA1, GETA2);
2655                             NEXT;
2656                         } else {
2657                             c2 = X0201;
2658                             SEND;
2659                         }
2660                     } else {
2661                         /* look like bogus code */
2662                         NEXT;
2663                     }
2664                 } else if (input_mode == X0208 || input_mode == X0212 ||
2665                            input_mode == X0213_1 || input_mode == X0213_2) {
2666                     /* in case of Kanji shifted */
2667                     c2 = c1;
2668                     NEXT;
2669                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2670                     /* Check MIME code */
2671                     if ((c1 = (*i_getc)(f)) == EOF) {
2672                         (*oconv)(0, '=');
2673                         LAST;
2674                     } else if (c1 == '?') {
2675                         /* =? is mime conversion start sequence */
2676                         if(mime_f == STRICT_MIME) {
2677                             /* check in real detail */
2678                             if (mime_begin_strict(f) == EOF)
2679                                 LAST;
2680                             else
2681                                 NEXT;
2682                         } else if (mime_begin(f) == EOF)
2683                             LAST;
2684                         else
2685                             NEXT;
2686                     } else {
2687                         (*oconv)(0, '=');
2688                         (*i_ungetc)(c1,f);
2689                         NEXT;
2690                     }
2691                 } else {
2692                     /* normal ASCII code */
2693                     SEND;
2694                 }
2695             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2696                 shift_mode = FALSE;
2697                 NEXT;
2698             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2699                 shift_mode = TRUE;
2700                 NEXT;
2701             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2702                 if ((c1 = (*i_getc)(f)) == EOF) {
2703                     /*  (*oconv)(0, ESC); don't send bogus code */
2704                     LAST;
2705                 } else if (c1 == '$') {
2706                     if ((c1 = (*i_getc)(f)) == EOF) {
2707                         /*
2708                         (*oconv)(0, ESC); don't send bogus code
2709                         (*oconv)(0, '$'); */
2710                         LAST;
2711                     } else if (c1 == '@'|| c1 == 'B') {
2712                         /* This is kanji introduction */
2713                         input_mode = X0208;
2714                         shift_mode = FALSE;
2715                         set_input_codename("ISO-2022-JP");
2716 #ifdef CHECK_OPTION
2717                         debug("ISO-2022-JP");
2718 #endif
2719                         NEXT;
2720                     } else if (c1 == '(') {
2721                         if ((c1 = (*i_getc)(f)) == EOF) {
2722                             /* don't send bogus code
2723                             (*oconv)(0, ESC);
2724                             (*oconv)(0, '$');
2725                             (*oconv)(0, '(');
2726                                 */
2727                             LAST;
2728                         } else if (c1 == '@'|| c1 == 'B') {
2729                             /* This is kanji introduction */
2730                             input_mode = X0208;
2731                             shift_mode = FALSE;
2732                             NEXT;
2733 #ifdef X0212_ENABLE
2734                         } else if (c1 == 'D'){
2735                             input_mode = X0212;
2736                             shift_mode = FALSE;
2737                             NEXT;
2738 #endif /* X0212_ENABLE */
2739                         } else if (c1 == (X0213_1&0x7F)){
2740                             input_mode = X0213_1;
2741                             shift_mode = FALSE;
2742                             NEXT;
2743                         } else if (c1 == (X0213_2&0x7F)){
2744                             input_mode = X0213_2;
2745                             shift_mode = FALSE;
2746                             NEXT;
2747                         } else {
2748                             /* could be some special code */
2749                             (*oconv)(0, ESC);
2750                             (*oconv)(0, '$');
2751                             (*oconv)(0, '(');
2752                             (*oconv)(0, c1);
2753                             NEXT;
2754                         }
2755                     } else if (broken_f&0x2) {
2756                         /* accept any ESC-(-x as broken code ... */
2757                         input_mode = X0208;
2758                         shift_mode = FALSE;
2759                         NEXT;
2760                     } else {
2761                         (*oconv)(0, ESC);
2762                         (*oconv)(0, '$');
2763                         (*oconv)(0, c1);
2764                         NEXT;
2765                     }
2766                 } else if (c1 == '(') {
2767                     if ((c1 = (*i_getc)(f)) == EOF) {
2768                         /* don't send bogus code
2769                         (*oconv)(0, ESC);
2770                         (*oconv)(0, '('); */
2771                         LAST;
2772                     } else {
2773                         if (c1 == 'I') {
2774                             /* This is X0201 kana introduction */
2775                             input_mode = X0201; shift_mode = X0201;
2776                             NEXT;
2777                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2778                             /* This is X0208 kanji introduction */
2779                             input_mode = ASCII; shift_mode = FALSE;
2780                             NEXT;
2781                         } else if (broken_f&0x2) {
2782                             input_mode = ASCII; shift_mode = FALSE;
2783                             NEXT;
2784                         } else {
2785                             (*oconv)(0, ESC);
2786                             (*oconv)(0, '(');
2787                             /* maintain various input_mode here */
2788                             SEND;
2789                         }
2790                     }
2791                } else if ( c1 == 'N' || c1 == 'n'){
2792                    /* SS2 */
2793                    c3 = (*i_getc)(f);  /* skip SS2 */
2794                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2795                        c1 = c3;
2796                        c2 = X0201;
2797                        SEND;
2798                    }else{
2799                        (*i_ungetc)(c3, f);
2800                        /* lonely ESC  */
2801                        (*oconv)(0, ESC);
2802                        SEND;
2803                    }
2804                 } else {
2805                     /* lonely ESC  */
2806                     (*oconv)(0, ESC);
2807                     SEND;
2808                 }
2809             } else if (c1 == ESC && iconv == s_iconv) {
2810                 /* ESC in Shift_JIS */
2811                 if ((c1 = (*i_getc)(f)) == EOF) {
2812                     /*  (*oconv)(0, ESC); don't send bogus code */
2813                     LAST;
2814                 } else if (c1 == '$') {
2815                     /* J-PHONE emoji */
2816                     if ((c1 = (*i_getc)(f)) == EOF) {
2817                         /*
2818                            (*oconv)(0, ESC); don't send bogus code
2819                            (*oconv)(0, '$'); */
2820                         LAST;
2821                     } else {
2822                         if (('E' <= c1 && c1 <= 'G') ||
2823                             ('O' <= c1 && c1 <= 'Q')) {
2824                             /*
2825                                NUM : 0 1 2 3 4 5
2826                                BYTE: G E F O P Q
2827                                C%7 : 1 6 0 2 3 4
2828                                C%7 : 0 1 2 3 4 5 6
2829                                NUM : 2 0 3 4 5 X 1
2830                              */
2831                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2832                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2833                             while ((c1 = (*i_getc)(f)) != EOF) {
2834                                 if (SP <= c1 && c1 <= 'z') {
2835                                     (*oconv)(0, c1 + c0);
2836                                 } else break; /* c1 == SO */
2837                             }
2838                         }
2839                     }
2840                     if (c1 == EOF) LAST;
2841                     NEXT;
2842                 } else {
2843                     /* lonely ESC  */
2844                     (*oconv)(0, ESC);
2845                     SEND;
2846                 }
2847             } else if (c1 == LF || c1 == CR) {
2848                 if (broken_f&4) {
2849                     input_mode = ASCII; set_iconv(FALSE, 0);
2850                     SEND;
2851                 } else if (mime_decode_f && !mime_decode_mode){
2852                     if (c1 == LF) {
2853                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2854                             i_ungetc(SP,f);
2855                             continue;
2856                         } else {
2857                             i_ungetc(c1,f);
2858                         }
2859                         c1 = LF;
2860                         SEND;
2861                     } else  { /* if (c1 == CR)*/
2862                         if ((c1=(*i_getc)(f))!=EOF) {
2863                             if (c1==SP) {
2864                                 i_ungetc(SP,f);
2865                                 continue;
2866                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
2867                                 i_ungetc(SP,f);
2868                                 continue;
2869                             } else {
2870                                 i_ungetc(c1,f);
2871                             }
2872                             i_ungetc(LF,f);
2873                         } else {
2874                             i_ungetc(c1,f);
2875                         }
2876                         c1 = CR;
2877                         SEND;
2878                     }
2879                 }
2880             } else if (c1 == DEL && input_mode == X0208) {
2881                 /* CP5022x */
2882                 c2 = c1;
2883                 NEXT;
2884             } else
2885                 SEND;
2886         }
2887         /* send: */
2888         switch(input_mode){
2889         case ASCII:
2890             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2891             case -2:
2892                 /* 4 bytes UTF-8 */
2893                 if ((c0 = (*i_getc)(f)) != EOF) {
2894                     code_status(c0);
2895                     c0 <<= 8;
2896                     if ((c3 = (*i_getc)(f)) != EOF) {
2897                         code_status(c3);
2898                         (*iconv)(c2, c1, c0|c3);
2899                     }
2900                 }
2901                 break;
2902             case -1:
2903                 /* 3 bytes EUC or UTF-8 */
2904                 if ((c0 = (*i_getc)(f)) != EOF) {
2905                     code_status(c0);
2906                     (*iconv)(c2, c1, c0);
2907                 }
2908                 break;
2909             }
2910             break;
2911         case X0208:
2912         case X0213_1:
2913             if (ms_ucs_map_f &&
2914                 0x7F <= c2 && c2 <= 0x92 &&
2915                 0x21 <= c1 && c1 <= 0x7E) {
2916                 /* CP932 UDC */
2917                 if(c1 == 0x7F) return 0;
2918                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2919                 c2 = 0;
2920             }
2921             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2922             break;
2923 #ifdef X0212_ENABLE
2924         case X0212:
2925             (*oconv)(PREFIX_EUCG3 | c2, c1);
2926             break;
2927 #endif /* X0212_ENABLE */
2928         case X0213_2:
2929             (*oconv)(PREFIX_EUCG3 | c2, c1);
2930             break;
2931         default:
2932             (*oconv)(input_mode, c1);  /* other special case */
2933         }
2934
2935         c2 = 0;
2936         c0 = 0;
2937         continue;
2938         /* goto next_word */
2939     }
2940
2941     /* epilogue */
2942     (*iconv)(EOF, 0, 0);
2943     if (!input_codename)
2944     {
2945         if (is_8bit) {
2946             struct input_code *p = input_code_list;
2947             struct input_code *result = p;
2948             while (p->name){
2949                 if (p->score < result->score) result = p;
2950                 ++p;
2951             }
2952             set_input_codename(result->name);
2953 #ifdef CHECK_OPTION
2954             debug(result->name);
2955 #endif
2956         }
2957     }
2958     return 1;
2959 }
2960
2961 nkf_char
2962 h_conv(FILE *f, nkf_char c2, nkf_char c1)
2963 {
2964     nkf_char ret, c3, c0;
2965     int hold_index;
2966
2967
2968     /** it must NOT be in the kanji shifte sequence      */
2969     /** it must NOT be written in JIS7                   */
2970     /** and it must be after 2 byte 8bit code            */
2971
2972     hold_count = 0;
2973     push_hold_buf(c2);
2974     push_hold_buf(c1);
2975
2976     while ((c1 = (*i_getc)(f)) != EOF) {
2977         if (c1 == ESC){
2978             (*i_ungetc)(c1,f);
2979             break;
2980         }
2981         code_status(c1);
2982         if (push_hold_buf(c1) == EOF || estab_f){
2983             break;
2984         }
2985     }
2986
2987     if (!estab_f){
2988         struct input_code *p = input_code_list;
2989         struct input_code *result = p;
2990         if (c1 == EOF){
2991             code_status(c1);
2992         }
2993         while (p->name){
2994             if (p->status_func && p->score < result->score){
2995                 result = p;
2996             }
2997             ++p;
2998         }
2999         set_iconv(TRUE, result->iconv_func);
3000     }
3001
3002
3003     /** now,
3004      ** 1) EOF is detected, or
3005      ** 2) Code is established, or
3006      ** 3) Buffer is FULL (but last word is pushed)
3007      **
3008      ** in 1) and 3) cases, we continue to use
3009      ** Kanji codes by oconv and leave estab_f unchanged.
3010      **/
3011
3012     ret = c1;
3013     hold_index = 0;
3014     while (hold_index < hold_count){
3015         c2 = hold_buf[hold_index++];
3016         if (c2 <= DEL
3017 #ifdef NUMCHAR_OPTION
3018             || is_unicode_capsule(c2)
3019 #endif
3020             ){
3021             (*iconv)(0, c2, 0);
3022             continue;
3023         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3024             (*iconv)(X0201, c2, 0);
3025             continue;
3026         }
3027         if (hold_index < hold_count){
3028             c1 = hold_buf[hold_index++];
3029         }else{
3030             c1 = (*i_getc)(f);
3031             if (c1 == EOF){
3032                 c3 = EOF;
3033                 break;
3034             }
3035             code_status(c1);
3036         }
3037         c0 = 0;
3038         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3039         case -2:
3040             /* 4 bytes UTF-8 */
3041             if (hold_index < hold_count){
3042                 c0 = hold_buf[hold_index++];
3043             } else if ((c0 = (*i_getc)(f)) == EOF) {
3044                 ret = EOF;
3045                 break;
3046             } else {
3047                 code_status(c0);
3048                 c0 <<= 8;
3049                 if (hold_index < hold_count){
3050                     c3 = hold_buf[hold_index++];
3051                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3052                     c0 = ret = EOF;
3053                     break;
3054                 } else {
3055                     code_status(c3);
3056                     (*iconv)(c2, c1, c0|c3);
3057                 }
3058             }
3059             break;
3060         case -1:
3061             /* 3 bytes EUC or UTF-8 */
3062             if (hold_index < hold_count){
3063                 c0 = hold_buf[hold_index++];
3064             } else if ((c0 = (*i_getc)(f)) == EOF) {
3065                 ret = EOF;
3066                 break;
3067             } else {
3068                 code_status(c0);
3069             }
3070             (*iconv)(c2, c1, c0);
3071             break;
3072         }
3073         if (c0 == EOF) break;
3074     }
3075     return ret;
3076 }
3077
3078 nkf_char push_hold_buf(nkf_char c2)
3079 {
3080     if (hold_count >= HOLD_SIZE*2)
3081         return (EOF);
3082     hold_buf[hold_count++] = (unsigned char)c2;
3083     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3084 }
3085
3086 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3087 {
3088 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3089     nkf_char val;
3090 #endif
3091     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3092 #ifdef SHIFTJIS_CP932
3093     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3094         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3095         if (val){
3096             c2 = val >> 8;
3097             c1 = val & 0xff;
3098         }
3099     }
3100     if (cp932inv_f
3101         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3102         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3103         if (c){
3104             c2 = c >> 8;
3105             c1 = c & 0xff;
3106         }
3107     }
3108 #endif /* SHIFTJIS_CP932 */
3109 #ifdef X0212_ENABLE
3110     if (!x0213_f && is_ibmext_in_sjis(c2)){
3111         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3112         if (val){
3113             if (val > 0x7FFF){
3114                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3115                 c1 = val & 0xff;
3116             }else{
3117                 c2 = val >> 8;
3118                 c1 = val & 0xff;
3119             }
3120             if (p2) *p2 = c2;
3121             if (p1) *p1 = c1;
3122             return 0;
3123         }
3124     }
3125 #endif
3126     if(c2 >= 0x80){
3127         if(x0213_f && c2 >= 0xF0){
3128             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3129                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3130             }else{ /* 78<=k<=94 */
3131                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3132                 if (0x9E < c1) c2++;
3133             }
3134         }else{
3135             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3136             if (0x9E < c1) c2++;
3137         }
3138         if (c1 < 0x9F)
3139             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3140         else {
3141             c1 = c1 - 0x7E;
3142         }
3143     }
3144
3145 #ifdef X0212_ENABLE
3146     c2 = x0212_unshift(c2);
3147 #endif
3148     if (p2) *p2 = c2;
3149     if (p1) *p1 = c1;
3150     return 0;
3151 }
3152
3153 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3154 {
3155     if (c2 == X0201) {
3156         c1 &= 0x7f;
3157     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3158         /* NOP */
3159     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3160         /* CP932 UDC */
3161         if(c1 == 0x7F) return 0;
3162         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3163         c2 = 0;
3164     } else {
3165         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3166         if (ret) return ret;
3167     }
3168     (*oconv)(c2, c1);
3169     return 0;
3170 }
3171
3172 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3173 {
3174     if (c2 == X0201) {
3175         c1 &= 0x7f;
3176 #ifdef X0212_ENABLE
3177     }else if (c2 == 0x8f){
3178         if (c0 == 0){
3179             return -1;
3180         }
3181         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3182             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3183             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3184             c2 = 0;
3185         } else {
3186             c2 = (c2 << 8) | (c1 & 0x7f);
3187             c1 = c0 & 0x7f;
3188 #ifdef SHIFTJIS_CP932
3189             if (cp51932_f){
3190                 nkf_char s2, s1;
3191                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3192                     s2e_conv(s2, s1, &c2, &c1);
3193                     if (c2 < 0x100){
3194                         c1 &= 0x7f;
3195                         c2 &= 0x7f;
3196                     }
3197                 }
3198             }
3199 #endif /* SHIFTJIS_CP932 */
3200         }
3201 #endif /* X0212_ENABLE */
3202     } else if (c2 == SSO){
3203         c2 = X0201;
3204         c1 &= 0x7f;
3205     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3206         /* NOP */
3207     } else {
3208         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3209             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3210             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3211             c2 = 0;
3212         } else {
3213             c1 &= 0x7f;
3214             c2 &= 0x7f;
3215 #ifdef SHIFTJIS_CP932
3216             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3217                 nkf_char s2, s1;
3218                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3219                     s2e_conv(s2, s1, &c2, &c1);
3220                     if (c2 < 0x100){
3221                         c1 &= 0x7f;
3222                         c2 &= 0x7f;
3223                     }
3224                 }
3225             }
3226 #endif /* SHIFTJIS_CP932 */
3227         }
3228     }
3229     (*oconv)(c2, c1);
3230     return 0;
3231 }
3232
3233 #ifdef UTF8_INPUT_ENABLE
3234 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3235 {
3236     nkf_char ret = 0;
3237
3238     if (!c1){
3239         *p2 = 0;
3240         *p1 = c2;
3241     }else if (0xc0 <= c2 && c2 <= 0xef) {
3242         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3243 #ifdef NUMCHAR_OPTION
3244         if (ret > 0){
3245             if (p2) *p2 = 0;
3246             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3247             ret = 0;
3248         }
3249 #endif
3250     }
3251     return ret;
3252 }
3253
3254 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3255 {
3256     nkf_char ret = 0;
3257     static const char w_iconv_utf8_1st_byte[] =
3258     { /* 0xC0 - 0xFF */
3259         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3260         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3261         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3262         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3263
3264     if (c2 < 0 || 0xff < c2) {
3265     }else if (c2 == 0) { /* 0 : 1 byte*/
3266         c0 = 0;
3267     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3268         return 0;
3269     } else{
3270         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3271         case 21:
3272             if (c1 < 0x80 || 0xBF < c1) return 0;
3273             break;
3274         case 30:
3275             if (c0 == 0) return -1;
3276             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3277                 return 0;
3278             break;
3279         case 31:
3280         case 33:
3281             if (c0 == 0) return -1;
3282             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3283                 return 0;
3284             break;
3285         case 32:
3286             if (c0 == 0) return -1;
3287             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3288                 return 0;
3289             break;
3290         case 40:
3291             if (c0 == 0) return -2;
3292             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3293                 return 0;
3294             break;
3295         case 41:
3296             if (c0 == 0) return -2;
3297             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3298                 return 0;
3299             break;
3300         case 42:
3301             if (c0 == 0) return -2;
3302             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3303                 return 0;
3304             break;
3305         default:
3306             return 0;
3307             break;
3308         }
3309     }
3310     if (c2 == 0 || c2 == EOF){
3311     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3312         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3313         c2 = 0;
3314     } else {
3315         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3316     }
3317     if (ret == 0){
3318         (*oconv)(c2, c1);
3319     }
3320     return ret;
3321 }
3322 #endif
3323
3324 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3325 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3326 {
3327     val &= VALUE_MASK;
3328     if (val < 0x80){
3329         *p2 = val;
3330         *p1 = 0;
3331         *p0 = 0;
3332     }else if (val < 0x800){
3333         *p2 = 0xc0 | (val >> 6);
3334         *p1 = 0x80 | (val & 0x3f);
3335         *p0 = 0;
3336     } else if (val <= NKF_INT32_C(0xFFFF)) {
3337         *p2 = 0xe0 | (val >> 12);
3338         *p1 = 0x80 | ((val >> 6) & 0x3f);
3339         *p0 = 0x80 | (val        & 0x3f);
3340     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3341         *p2 = 0xe0 |  (val >> 16);
3342         *p1 = 0x80 | ((val >> 12) & 0x3f);
3343         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3344     } else {
3345         *p2 = 0;
3346         *p1 = 0;
3347         *p0 = 0;
3348     }
3349 }
3350 #endif
3351
3352 #ifdef UTF8_INPUT_ENABLE
3353 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3354 {
3355     nkf_char val;
3356     if (c2 >= 0xf8) {
3357         val = -1;
3358     } else if (c2 >= 0xf0){
3359         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3360         val = (c2 & 0x0f) << 18;
3361         val |= (c1 & 0x3f) << 12;
3362         val |= (c0 & 0x3f00) >> 2;
3363         val |= (c0 & 0x3f);
3364     }else if (c2 >= 0xe0){
3365         val = (c2 & 0x0f) << 12;
3366         val |= (c1 & 0x3f) << 6;
3367         val |= (c0 & 0x3f);
3368     }else if (c2 >= 0xc0){
3369         val = (c2 & 0x1f) << 6;
3370         val |= (c1 & 0x3f);
3371     }else{
3372         val = c2;
3373     }
3374     return val;
3375 }
3376
3377 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3378 {
3379     nkf_char c2, c1, c0;
3380     nkf_char ret = 0;
3381     val &= VALUE_MASK;
3382     if (val < 0x80){
3383         *p2 = 0;
3384         *p1 = val;
3385     }else{
3386         w16w_conv(val, &c2, &c1, &c0);
3387         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3388 #ifdef NUMCHAR_OPTION
3389         if (ret > 0){
3390             *p2 = 0;
3391             *p1 = CLASS_UNICODE | val;
3392             ret = 0;
3393         }
3394 #endif
3395     }
3396     return ret;
3397 }
3398 #endif
3399
3400 #ifdef UTF8_INPUT_ENABLE
3401 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3402 {
3403     nkf_char ret = 0;
3404     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3405         (*oconv)(c2, c1);
3406         return 0;
3407     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3408         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3409             return -2;
3410         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3411         c2 = 0;
3412     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3413         /*
3414            return 2;
3415         */
3416         return 1;
3417     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3418     if (ret) return ret;
3419     (*oconv)(c2, c1);
3420     return 0;
3421 }
3422
3423 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3424 {
3425     int ret = 0;
3426
3427     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3428     } else if (is_unicode_bmp(c1)) {
3429         ret = w16e_conv(c1, &c2, &c1);
3430     } else {
3431         c2 = 0;
3432         c1 =  CLASS_UNICODE | c1;
3433     }
3434     if (ret) return ret;
3435     (*oconv)(c2, c1);
3436     return 0;
3437 }
3438
3439 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3440 {
3441     const unsigned short *const *pp;
3442     const unsigned short *const *const *ppp;
3443     static const char no_best_fit_chars_table_C2[] =
3444     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3445         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3446         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3447         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3448     static const char no_best_fit_chars_table_C2_ms[] =
3449     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3450         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3451         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3452         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3453     static const char no_best_fit_chars_table_932_C2[] =
3454     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3455         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3456         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3457         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3458     static const char no_best_fit_chars_table_932_C3[] =
3459     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3460         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3461         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3462         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3463     nkf_char ret = 0;
3464
3465     if(c2 < 0x80){
3466         *p2 = 0;
3467         *p1 = c2;
3468     }else if(c2 < 0xe0){
3469         if(no_best_fit_chars_f){
3470             if(ms_ucs_map_f == UCS_MAP_CP932){
3471                 switch(c2){
3472                 case 0xC2:
3473                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3474                     break;
3475                 case 0xC3:
3476                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3477                     break;
3478                 }
3479             }else if(!cp932inv_f){
3480                 switch(c2){
3481                 case 0xC2:
3482                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3483                     break;
3484                 case 0xC3:
3485                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3486                     break;
3487                 }
3488             }else if(ms_ucs_map_f == UCS_MAP_MS){
3489                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3490             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3491                 switch(c2){
3492                 case 0xC2:
3493                     switch(c1){
3494                     case 0xA2:
3495                     case 0xA3:
3496                     case 0xA5:
3497                     case 0xA6:
3498                     case 0xAC:
3499                     case 0xAF:
3500                     case 0xB8:
3501                         return 1;
3502                     }
3503                     break;
3504                 }
3505             }
3506         }
3507         pp =
3508             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3509             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3510             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3511             utf8_to_euc_2bytes;
3512         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3513     }else if(c0 < 0xF0){
3514         if(no_best_fit_chars_f){
3515             if(ms_ucs_map_f == UCS_MAP_CP932){
3516                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3517             }else if(ms_ucs_map_f == UCS_MAP_MS){
3518                 switch(c2){
3519                 case 0xE2:
3520                     switch(c1){
3521                     case 0x80:
3522                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3523                         break;
3524                     case 0x88:
3525                         if(c0 == 0x92) return 1;
3526                         break;
3527                     }
3528                     break;
3529                 case 0xE3:
3530                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3531                     break;
3532                 }
3533             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3534                 switch(c2){
3535                 case 0xE3:
3536                     switch(c1){
3537                     case 0x82:
3538                             if(c0 == 0x94) return 1;
3539                         break;
3540                     case 0x83:
3541                             if(c0 == 0xBB) return 1;
3542                         break;
3543                     }
3544                     break;
3545                 }
3546             }else{
3547                 switch(c2){
3548                 case 0xE2:
3549                     switch(c1){
3550                     case 0x80:
3551                         if(c0 == 0x95) return 1;
3552                         break;
3553                     case 0x88:
3554                         if(c0 == 0xA5) return 1;
3555                         break;
3556                     }
3557                     break;
3558                 case 0xEF:
3559                     switch(c1){
3560                     case 0xBC:
3561                         if(c0 == 0x8D) return 1;
3562                         break;
3563                     case 0xBD:
3564                         if(c0 == 0x9E && !cp932inv_f) return 1;
3565                         break;
3566                     case 0xBF:
3567                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3568                         break;
3569                     }
3570                     break;
3571                 }
3572             }
3573         }
3574         ppp =
3575             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3576             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3577             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3578             utf8_to_euc_3bytes;
3579         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3580     }else return -1;
3581 #ifdef SHIFTJIS_CP932
3582     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3583         nkf_char s2, s1;
3584         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3585             s2e_conv(s2, s1, p2, p1);
3586         }else{
3587             ret = 1;
3588         }
3589     }
3590 #endif
3591     return ret;
3592 }
3593
3594 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3595 {
3596     nkf_char c2;
3597     const unsigned short *p;
3598     unsigned short val;
3599
3600     if (pp == 0) return 1;
3601
3602     c1 -= 0x80;
3603     if (c1 < 0 || psize <= c1) return 1;
3604     p = pp[c1];
3605     if (p == 0)  return 1;
3606
3607     c0 -= 0x80;
3608     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3609     val = p[c0];
3610     if (val == 0) return 1;
3611     if (no_cp932ext_f && (
3612         (val>>8) == 0x2D || /* NEC special characters */
3613         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3614         )) return 1;
3615
3616     c2 = val >> 8;
3617    if (val > 0x7FFF){
3618         c2 &= 0x7f;
3619         c2 |= PREFIX_EUCG3;
3620     }
3621     if (c2 == SO) c2 = X0201;
3622     c1 = val & 0x7f;
3623     if (p2) *p2 = c2;
3624     if (p1) *p1 = c1;
3625     return 0;
3626 }
3627
3628 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3629 {
3630     int shift = 20;
3631     c &= VALUE_MASK;
3632     while(shift >= 0){
3633         if(c >= 1<<shift){
3634             while(shift >= 0){
3635                 (*f)(0, bin2hex(c>>shift));
3636                 shift -= 4;
3637             }
3638         }else{
3639             shift -= 4;
3640         }
3641     }
3642     return;
3643 }
3644
3645 void encode_fallback_html(nkf_char c)
3646 {
3647     (*oconv)(0, '&');
3648     (*oconv)(0, '#');
3649     c &= VALUE_MASK;
3650     if(c >= NKF_INT32_C(1000000))
3651         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3652     if(c >= NKF_INT32_C(100000))
3653         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3654     if(c >= 10000)
3655         (*oconv)(0, 0x30+(c/10000  )%10);
3656     if(c >= 1000)
3657         (*oconv)(0, 0x30+(c/1000   )%10);
3658     if(c >= 100)
3659         (*oconv)(0, 0x30+(c/100    )%10);
3660     if(c >= 10)
3661         (*oconv)(0, 0x30+(c/10     )%10);
3662     if(c >= 0)
3663         (*oconv)(0, 0x30+ c         %10);
3664     (*oconv)(0, ';');
3665     return;
3666 }
3667
3668 void encode_fallback_xml(nkf_char c)
3669 {
3670     (*oconv)(0, '&');
3671     (*oconv)(0, '#');
3672     (*oconv)(0, 'x');
3673     nkf_each_char_to_hex(oconv, c);
3674     (*oconv)(0, ';');
3675     return;
3676 }
3677
3678 void encode_fallback_java(nkf_char c)
3679 {
3680     (*oconv)(0, '\\');
3681     c &= VALUE_MASK;
3682     if(!is_unicode_bmp(c)){
3683         (*oconv)(0, 'U');
3684         (*oconv)(0, '0');
3685         (*oconv)(0, '0');
3686         (*oconv)(0, bin2hex(c>>20));
3687         (*oconv)(0, bin2hex(c>>16));
3688     }else{
3689         (*oconv)(0, 'u');
3690     }
3691     (*oconv)(0, bin2hex(c>>12));
3692     (*oconv)(0, bin2hex(c>> 8));
3693     (*oconv)(0, bin2hex(c>> 4));
3694     (*oconv)(0, bin2hex(c    ));
3695     return;
3696 }
3697
3698 void encode_fallback_perl(nkf_char c)
3699 {
3700     (*oconv)(0, '\\');
3701     (*oconv)(0, 'x');
3702     (*oconv)(0, '{');
3703     nkf_each_char_to_hex(oconv, c);
3704     (*oconv)(0, '}');
3705     return;
3706 }
3707
3708 void encode_fallback_subchar(nkf_char c)
3709 {
3710     c = unicode_subchar;
3711     (*oconv)((c>>8)&0xFF, c&0xFF);
3712     return;
3713 }
3714 #endif
3715
3716 #ifdef UTF8_OUTPUT_ENABLE
3717 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3718 {
3719     const unsigned short *p;
3720
3721     if (c2 == X0201) {
3722         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3723             switch (c1) {
3724             case 0x20:
3725                 return 0xA0;
3726             case 0x7D:
3727                 return 0xA9;
3728             }
3729         }
3730         p = euc_to_utf8_1byte;
3731 #ifdef X0212_ENABLE
3732     } else if (is_eucg3(c2)){
3733         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3734             return 0xA6;
3735         }
3736         c2 = (c2&0x7f) - 0x21;
3737         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3738             p = x0212_to_utf8_2bytes[c2];
3739         else
3740             return 0;
3741 #endif
3742     } else {
3743         c2 &= 0x7f;
3744         c2 = (c2&0x7f) - 0x21;
3745         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3746             p =
3747                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3748                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3749                 euc_to_utf8_2bytes_ms[c2];
3750         else
3751             return 0;
3752     }
3753     if (!p) return 0;
3754     c1 = (c1 & 0x7f) - 0x21;
3755     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3756         return p[c1];
3757     return 0;
3758 }
3759
3760 void w_oconv(nkf_char c2, nkf_char c1)
3761 {
3762     nkf_char c0;
3763     nkf_char val;
3764
3765     if (output_bom_f) {
3766         output_bom_f = FALSE;
3767         (*o_putc)('\357');
3768         (*o_putc)('\273');
3769         (*o_putc)('\277');
3770     }
3771
3772     if (c2 == EOF) {
3773         (*o_putc)(EOF);
3774         return;
3775     }
3776
3777 #ifdef NUMCHAR_OPTION
3778     if (c2 == 0 && is_unicode_capsule(c1)){
3779         val = c1 & VALUE_MASK;
3780         if (val < 0x80){
3781             (*o_putc)(val);
3782         }else if (val < 0x800){
3783             (*o_putc)(0xC0 | (val >> 6));
3784             (*o_putc)(0x80 | (val & 0x3f));
3785         } else if (val <= NKF_INT32_C(0xFFFF)) {
3786             (*o_putc)(0xE0 | (val >> 12));
3787             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3788             (*o_putc)(0x80 | (val        & 0x3f));
3789         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3790             (*o_putc)(0xF0 | ( val>>18));
3791             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3792             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3793             (*o_putc)(0x80 | ( val      & 0x3f));
3794         }
3795         return;
3796     }
3797 #endif
3798
3799     if (c2 == 0) {
3800         output_mode = ASCII;
3801         (*o_putc)(c1);
3802     } else if (c2 == ISO8859_1) {
3803         output_mode = ISO8859_1;
3804         (*o_putc)(c1 | 0x080);
3805     } else {
3806         output_mode = UTF8;
3807         val = e2w_conv(c2, c1);
3808         if (val){
3809             w16w_conv(val, &c2, &c1, &c0);
3810             (*o_putc)(c2);
3811             if (c1){
3812                 (*o_putc)(c1);
3813                 if (c0) (*o_putc)(c0);
3814             }
3815         }
3816     }
3817 }
3818
3819 void w_oconv16(nkf_char c2, nkf_char c1)
3820 {
3821     if (output_bom_f) {
3822         output_bom_f = FALSE;
3823         if (output_endian == ENDIAN_LITTLE){
3824             (*o_putc)((unsigned char)'\377');
3825             (*o_putc)('\376');
3826         }else{
3827             (*o_putc)('\376');
3828             (*o_putc)((unsigned char)'\377');
3829         }
3830     }
3831
3832     if (c2 == EOF) {
3833         (*o_putc)(EOF);
3834         return;
3835     }
3836
3837     if (c2 == ISO8859_1) {
3838         c2 = 0;
3839         c1 |= 0x80;
3840 #ifdef NUMCHAR_OPTION
3841     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3842         if (is_unicode_bmp(c1)) {
3843             c2 = (c1 >> 8) & 0xff;
3844             c1 &= 0xff;
3845         } else {
3846             c1 &= VALUE_MASK;
3847             if (c1 <= UNICODE_MAX) {
3848                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3849                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3850                 if (output_endian == ENDIAN_LITTLE){
3851                     (*o_putc)(c2 & 0xff);
3852                     (*o_putc)((c2 >> 8) & 0xff);
3853                     (*o_putc)(c1 & 0xff);
3854                     (*o_putc)((c1 >> 8) & 0xff);
3855                 }else{
3856                     (*o_putc)((c2 >> 8) & 0xff);
3857                     (*o_putc)(c2 & 0xff);
3858                     (*o_putc)((c1 >> 8) & 0xff);
3859                     (*o_putc)(c1 & 0xff);
3860                 }
3861             }
3862             return;
3863         }
3864 #endif
3865     } else if (c2) {
3866         nkf_char val = e2w_conv(c2, c1);
3867         c2 = (val >> 8) & 0xff;
3868         c1 = val & 0xff;
3869         if (!val) return;
3870     }
3871     if (output_endian == ENDIAN_LITTLE){
3872         (*o_putc)(c1);
3873         (*o_putc)(c2);
3874     }else{
3875         (*o_putc)(c2);
3876         (*o_putc)(c1);
3877     }
3878 }
3879
3880 void w_oconv32(nkf_char c2, nkf_char c1)
3881 {
3882     if (output_bom_f) {
3883         output_bom_f = FALSE;
3884         if (output_endian == ENDIAN_LITTLE){
3885             (*o_putc)((unsigned char)'\377');
3886             (*o_putc)('\376');
3887             (*o_putc)('\000');
3888             (*o_putc)('\000');
3889         }else{
3890             (*o_putc)('\000');
3891             (*o_putc)('\000');
3892             (*o_putc)('\376');
3893             (*o_putc)((unsigned char)'\377');
3894         }
3895     }
3896
3897     if (c2 == EOF) {
3898         (*o_putc)(EOF);
3899         return;
3900     }
3901
3902     if (c2 == ISO8859_1) {
3903         c1 |= 0x80;
3904 #ifdef NUMCHAR_OPTION
3905     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3906         c1 &= VALUE_MASK;
3907 #endif
3908     } else if (c2) {
3909         c1 = e2w_conv(c2, c1);
3910         if (!c1) return;
3911     }
3912     if (output_endian == ENDIAN_LITTLE){
3913         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3914         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3915         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3916         (*o_putc)('\000');
3917     }else{
3918         (*o_putc)('\000');
3919         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3920         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3921         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3922     }
3923 }
3924 #endif
3925
3926 void e_oconv(nkf_char c2, nkf_char c1)
3927 {
3928 #ifdef NUMCHAR_OPTION
3929     if (c2 == 0 && is_unicode_capsule(c1)){
3930         w16e_conv(c1, &c2, &c1);
3931         if (c2 == 0 && is_unicode_capsule(c1)){
3932             c2 = c1 & VALUE_MASK;
3933             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
3934                 /* eucJP-ms UDC */
3935                 c1 &= 0xFFF;
3936                 c2 = c1 / 94;
3937                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
3938                 c1 = 0x21 + c1 % 94;
3939                 if (is_eucg3(c2)){
3940                     (*o_putc)(0x8f);
3941                     (*o_putc)((c2 & 0x7f) | 0x080);
3942                     (*o_putc)(c1 | 0x080);
3943                 }else{
3944                     (*o_putc)((c2 & 0x7f) | 0x080);
3945                     (*o_putc)(c1 | 0x080);
3946                 }
3947                 return;
3948             } else {
3949                 if (encode_fallback) (*encode_fallback)(c1);
3950                 return;
3951             }
3952         }
3953     }
3954 #endif
3955     if (c2 == EOF) {
3956         (*o_putc)(EOF);
3957         return;
3958     } else if (c2 == 0) {
3959         output_mode = ASCII;
3960         (*o_putc)(c1);
3961     } else if (c2 == X0201) {
3962         output_mode = JAPANESE_EUC;
3963         (*o_putc)(SSO); (*o_putc)(c1|0x80);
3964     } else if (c2 == ISO8859_1) {
3965         output_mode = ISO8859_1;
3966         (*o_putc)(c1 | 0x080);
3967 #ifdef X0212_ENABLE
3968     } else if (is_eucg3(c2)){
3969         output_mode = JAPANESE_EUC;
3970 #ifdef SHIFTJIS_CP932
3971         if (!cp932inv_f){
3972             nkf_char s2, s1;
3973             if (e2s_conv(c2, c1, &s2, &s1) == 0){
3974                 s2e_conv(s2, s1, &c2, &c1);
3975             }
3976         }
3977 #endif
3978         if (c2 == 0) {
3979             output_mode = ASCII;
3980             (*o_putc)(c1);
3981         }else if (is_eucg3(c2)){
3982             if (x0212_f){
3983                 (*o_putc)(0x8f);
3984                 (*o_putc)((c2 & 0x7f) | 0x080);
3985                 (*o_putc)(c1 | 0x080);
3986             }
3987         }else{
3988             (*o_putc)((c2 & 0x7f) | 0x080);
3989             (*o_putc)(c1 | 0x080);
3990         }
3991 #endif
3992     } else {
3993         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
3994             set_iconv(FALSE, 0);
3995             return; /* too late to rescue this char */
3996         }
3997         output_mode = JAPANESE_EUC;
3998         (*o_putc)(c2 | 0x080);
3999         (*o_putc)(c1 | 0x080);
4000     }
4001 }
4002
4003 #ifdef X0212_ENABLE
4004 nkf_char x0212_shift(nkf_char c)
4005 {
4006     nkf_char ret = c;
4007     c &= 0x7f;
4008     if (is_eucg3(ret)){
4009         if (0x75 <= c && c <= 0x7f){
4010             ret = c + (0x109 - 0x75);
4011         }
4012     }else{
4013         if (0x75 <= c && c <= 0x7f){
4014             ret = c + (0x113 - 0x75);
4015         }
4016     }
4017     return ret;
4018 }
4019
4020
4021 nkf_char x0212_unshift(nkf_char c)
4022 {
4023     nkf_char ret = c;
4024     if (0x7f <= c && c <= 0x88){
4025         ret = c + (0x75 - 0x7f);
4026     }else if (0x89 <= c && c <= 0x92){
4027         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4028     }
4029     return ret;
4030 }
4031 #endif /* X0212_ENABLE */
4032
4033 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4034 {
4035     nkf_char ndx;
4036     if (is_eucg3(c2)){
4037         ndx = c2 & 0x7f;
4038         if (x0213_f){
4039             if((0x21 <= ndx && ndx <= 0x2F)){
4040                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4041                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4042                 return 0;
4043             }else if(0x6E <= ndx && ndx <= 0x7E){
4044                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4045                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4046                 return 0;
4047             }
4048             return 1;
4049         }
4050 #ifdef X0212_ENABLE
4051         else if(nkf_isgraph(ndx)){
4052             nkf_char val = 0;
4053             const unsigned short *ptr;
4054             ptr = x0212_shiftjis[ndx - 0x21];
4055             if (ptr){
4056                 val = ptr[(c1 & 0x7f) - 0x21];
4057             }
4058             if (val){
4059                 c2 = val >> 8;
4060                 c1 = val & 0xff;
4061                 if (p2) *p2 = c2;
4062                 if (p1) *p1 = c1;
4063                 return 0;
4064             }
4065             c2 = x0212_shift(c2);
4066         }
4067 #endif /* X0212_ENABLE */
4068     }
4069     if(0x7F < c2) return 1;
4070     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4071     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4072     return 0;
4073 }
4074
4075 void s_oconv(nkf_char c2, nkf_char c1)
4076 {
4077 #ifdef NUMCHAR_OPTION
4078     if (c2 == 0 && is_unicode_capsule(c1)){
4079         w16e_conv(c1, &c2, &c1);
4080         if (c2 == 0 && is_unicode_capsule(c1)){
4081             c2 = c1 & VALUE_MASK;
4082             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
4083                 /* CP932 UDC */
4084                 c1 &= 0xFFF;
4085                 c2 = c1 / 188 + 0xF0;
4086                 c1 = c1 % 188;
4087                 c1 += 0x40 + (c1 > 0x3e);
4088                 (*o_putc)(c2);
4089                 (*o_putc)(c1);
4090                 return;
4091             } else {
4092                 if(encode_fallback)(*encode_fallback)(c1);
4093                 return;
4094             }
4095         }
4096     }
4097 #endif
4098     if (c2 == EOF) {
4099         (*o_putc)(EOF);
4100         return;
4101     } else if (c2 == 0) {
4102         output_mode = ASCII;
4103         (*o_putc)(c1);
4104     } else if (c2 == X0201) {
4105         output_mode = SHIFT_JIS;
4106         (*o_putc)(c1|0x80);
4107     } else if (c2 == ISO8859_1) {
4108         output_mode = ISO8859_1;
4109         (*o_putc)(c1 | 0x080);
4110 #ifdef X0212_ENABLE
4111     } else if (is_eucg3(c2)){
4112         output_mode = SHIFT_JIS;
4113         if (e2s_conv(c2, c1, &c2, &c1) == 0){
4114             (*o_putc)(c2);
4115             (*o_putc)(c1);
4116         }
4117 #endif
4118     } else {
4119         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4120             set_iconv(FALSE, 0);
4121             return; /* too late to rescue this char */
4122         }
4123         output_mode = SHIFT_JIS;
4124         e2s_conv(c2, c1, &c2, &c1);
4125
4126 #ifdef SHIFTJIS_CP932
4127         if (cp932inv_f
4128             && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
4129             nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
4130             if (c){
4131                 c2 = c >> 8;
4132                 c1 = c & 0xff;
4133             }
4134         }
4135 #endif /* SHIFTJIS_CP932 */
4136
4137         (*o_putc)(c2);
4138         if (prefix_table[(unsigned char)c1]){
4139             (*o_putc)(prefix_table[(unsigned char)c1]);
4140         }
4141         (*o_putc)(c1);
4142     }
4143 }
4144
4145 void j_oconv(nkf_char c2, nkf_char c1)
4146 {
4147 #ifdef NUMCHAR_OPTION
4148     if (c2 == 0 && is_unicode_capsule(c1)){
4149         w16e_conv(c1, &c2, &c1);
4150         if (c2 == 0 && is_unicode_capsule(c1)){
4151             c2 = c1 & VALUE_MASK;
4152             if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
4153                 /* CP5022x UDC */
4154                 c1 &= 0xFFF;
4155                 c2 = 0x7F + c1 / 94;
4156                 c1 = 0x21 + c1 % 94;
4157             } else {
4158                 if (encode_fallback) (*encode_fallback)(c1);
4159                 return;
4160             }
4161         }
4162     }
4163 #endif
4164     if (c2 == EOF) {
4165         if (output_mode !=ASCII && output_mode!=ISO8859_1) {
4166             (*o_putc)(ESC);
4167             (*o_putc)('(');
4168             (*o_putc)(ascii_intro);
4169             output_mode = ASCII;
4170         }
4171         (*o_putc)(EOF);
4172 #ifdef X0212_ENABLE
4173     } else if (is_eucg3(c2)){
4174         if(x0213_f){
4175             if(output_mode!=X0213_2){
4176                 output_mode = X0213_2;
4177                 (*o_putc)(ESC);
4178                 (*o_putc)('$');
4179                 (*o_putc)('(');
4180                 (*o_putc)(X0213_2&0x7F);
4181             }
4182         }else{
4183             if(output_mode!=X0212){
4184                 output_mode = X0212;
4185                 (*o_putc)(ESC);
4186                 (*o_putc)('$');
4187                 (*o_putc)('(');
4188                 (*o_putc)(X0212&0x7F);
4189             }
4190         }
4191         (*o_putc)(c2 & 0x7f);
4192         (*o_putc)(c1);
4193 #endif
4194     } else if (c2==X0201) {
4195         if (output_mode!=X0201) {
4196             output_mode = X0201;
4197             (*o_putc)(ESC);
4198             (*o_putc)('(');
4199             (*o_putc)('I');
4200         }
4201         (*o_putc)(c1);
4202     } else if (c2==ISO8859_1) {
4203             /* iso8859 introduction, or 8th bit on */
4204             /* Can we convert in 7bit fo