OSDN Git Service

* Remove extra space.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.137 2007/10/01 14:29:21 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-10-01"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
43 #define MSDOS
44 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
45 #define __WIN32__
46 #endif
47 #endif
48
49 #ifdef PERL_XS
50 #undef OVERWRITE
51 #endif
52
53 #ifndef PERL_XS
54 #include <stdio.h>
55 #endif
56
57 #include <stdlib.h>
58 #include <string.h>
59
60 #if defined(MSDOS) || defined(__OS2__)
61 #include <fcntl.h>
62 #include <io.h>
63 #if defined(_MSC_VER) || defined(__WATCOMC__)
64 #define mktemp _mktemp
65 #endif
66 #endif
67
68 #ifdef MSDOS
69 #ifdef LSI_C
70 #define setbinmode(fp) fsetbin(fp)
71 #elif defined(__DJGPP__)
72 #include <libc/dosio.h>
73 #define setbinmode(fp) djgpp_setbinmode(fp)
74 #else /* Microsoft C, Turbo C */
75 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
76 #endif
77 #else /* UNIX */
78 #define setbinmode(fp)
79 #endif
80
81 #if defined(__DJGPP__)
82 void  djgpp_setbinmode(FILE *fp)
83 {
84     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
85     int fd, m;
86     fd = fileno(fp);
87     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
88     __file_handle_set(fd, m);
89 }
90 #endif
91
92 #ifdef _IOFBF /* SysV and MSDOS, Windows */
93 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
94 #else /* BSD */
95 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
96 #endif
97
98 /*Borland C++ 4.5 EasyWin*/
99 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
100 #define         EASYWIN
101 #ifndef __WIN16__
102 #define __WIN16__
103 #endif
104 #include <windows.h>
105 #endif
106
107 #ifdef OVERWRITE
108 /* added by satoru@isoternet.org */
109 #if defined(__EMX__)
110 #include <sys/types.h>
111 #endif
112 #include <sys/stat.h>
113 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
114 #include <unistd.h>
115 #if defined(__WATCOMC__)
116 #include <sys/utime.h>
117 #else
118 #include <utime.h>
119 #endif
120 #else /* defined(MSDOS) */
121 #ifdef __WIN32__
122 #ifdef __BORLANDC__ /* BCC32 */
123 #include <utime.h>
124 #else /* !defined(__BORLANDC__) */
125 #include <sys/utime.h>
126 #endif /* (__BORLANDC__) */
127 #else /* !defined(__WIN32__) */
128 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
129 #include <sys/utime.h>
130 #elif defined(__TURBOC__) /* BCC */
131 #include <utime.h>
132 #elif defined(LSI_C) /* LSI C */
133 #endif /* (__WIN32__) */
134 #endif
135 #endif
136 #endif
137
138 #define         FALSE   0
139 #define         TRUE    1
140
141 /* state of output_mode and input_mode
142
143    c2           0 means ASCII
144                 X0201
145                 ISO8859_1
146                 X0208
147                 EOF      all termination
148    c1           32bit data
149
150  */
151
152 #define         ASCII           0
153 #define         X0208           1
154 #define         X0201           2
155 #define         ISO8859_1       8
156 #define         NO_X0201        3
157 #define         X0212      0x2844
158 #define         X0213_1    0x284F
159 #define         X0213_2    0x2850
160
161 /* Input Assumption */
162
163 #define         JIS_INPUT       4
164 #define         EUC_INPUT      16
165 #define         SJIS_INPUT      5
166 #define         LATIN1_INPUT    6
167 #define         FIXED_MIME      7
168 #define         STRICT_MIME     8
169
170 /* MIME ENCODE */
171
172 #define         ISO2022JP       9
173 #define         JAPANESE_EUC   10
174 #define         SHIFT_JIS      11
175
176 #define         UTF8           12
177 #define         UTF8_INPUT     13
178 #define         UTF16_INPUT    1015
179 #define         UTF32_INPUT    1017
180
181 /* byte order */
182
183 #define         ENDIAN_BIG      1234
184 #define         ENDIAN_LITTLE   4321
185 #define         ENDIAN_2143     2143
186 #define         ENDIAN_3412     3412
187
188 #define         WISH_TRUE      15
189
190 /* ASCII CODE */
191
192 #define         BS      0x08
193 #define         TAB     0x09
194 #define         LF      0x0a
195 #define         CR      0x0d
196 #define         ESC     0x1b
197 #define         SP      0x20
198 #define         AT      0x40
199 #define         SSP     0xa0
200 #define         DEL     0x7f
201 #define         SI      0x0f
202 #define         SO      0x0e
203 #define         SSO     0x8e
204 #define         SS3     0x8f
205 #define         CRLF    0x0D0A
206
207 #define         is_alnum(c)  \
208             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
209
210 /* I don't trust portablity of toupper */
211 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
212 #define nkf_isoctal(c)  ('0'<=c && c<='7')
213 #define nkf_isdigit(c)  ('0'<=c && c<='9')
214 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
215 #define nkf_isblank(c) (c == SP || c == TAB)
216 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
217 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
218 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
219 #define nkf_isprint(c) (SP<=c && c<='~')
220 #define nkf_isgraph(c) ('!'<=c && c<='~')
221 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
222                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
223                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
224 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
225 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
226
227 #define CP932_TABLE_BEGIN 0xFA
228 #define CP932_TABLE_END   0xFC
229 #define CP932INV_TABLE_BEGIN 0xED
230 #define CP932INV_TABLE_END   0xEE
231 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
232
233 #define         HOLD_SIZE       1024
234 #if defined(INT_IS_SHORT)
235 #define         IOBUF_SIZE      2048
236 #else
237 #define         IOBUF_SIZE      16384
238 #endif
239
240 #define         DEFAULT_J       'B'
241 #define         DEFAULT_R       'B'
242
243 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
244 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
245
246 #define         RANGE_NUM_MAX   18
247 #define         GETA1   0x22
248 #define         GETA2   0x2e
249
250
251 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
252 #define sizeof_euc_to_utf8_1byte 94
253 #define sizeof_euc_to_utf8_2bytes 94
254 #define sizeof_utf8_to_euc_C2 64
255 #define sizeof_utf8_to_euc_E5B8 64
256 #define sizeof_utf8_to_euc_2bytes 112
257 #define sizeof_utf8_to_euc_3bytes 16
258 #endif
259
260 /* MIME preprocessor */
261
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
264 #endif
265
266 struct input_code{
267     char *name;
268     nkf_char stat;
269     nkf_char score;
270     nkf_char index;
271     nkf_char buf[3];
272     void (*status_func)(struct input_code *, nkf_char);
273     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
274     int _file_stat;
275 };
276
277 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
278
279 #ifndef PERL_XS
280 static const char *CopyRight = COPY_RIGHT;
281 #endif
282 #if !defined(PERL_XS) && !defined(WIN32DLL)
283 static  nkf_char     noconvert(FILE *f);
284 #endif
285 static  void    module_connection(void);
286 static  nkf_char     kanji_convert(FILE *f);
287 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
288 static  nkf_char     push_hold_buf(nkf_char c2);
289 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
290 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
291 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
292 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
293 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
294 /* UCS Mapping
295  * 0: Shift_JIS, eucJP-ascii
296  * 1: eucJP-ms
297  * 2: CP932, CP51932
298  * 3: CP10001
299  */
300 #define UCS_MAP_ASCII   0
301 #define UCS_MAP_MS      1
302 #define UCS_MAP_CP932   2
303 #define UCS_MAP_CP10001 3
304 static int ms_ucs_map_f = UCS_MAP_ASCII;
305 #endif
306 #ifdef UTF8_INPUT_ENABLE
307 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
308 static  int     no_cp932ext_f = FALSE;
309 /* ignore ZERO WIDTH NO-BREAK SPACE */
310 static  int     no_best_fit_chars_f = FALSE;
311 static  int     input_endian = ENDIAN_BIG;
312 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
313 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
314 static  void    encode_fallback_html(nkf_char c);
315 static  void    encode_fallback_xml(nkf_char c);
316 static  void    encode_fallback_java(nkf_char c);
317 static  void    encode_fallback_perl(nkf_char c);
318 static  void    encode_fallback_subchar(nkf_char c);
319 static  void    (*encode_fallback)(nkf_char c) = NULL;
320 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
321 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
322 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
323 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
324 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
325 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
326 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
327 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
328 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
329 static  void    w_status(struct input_code *, nkf_char);
330 #endif
331 #ifdef UTF8_OUTPUT_ENABLE
332 static  int     output_bom_f = FALSE;
333 static  int     output_endian = ENDIAN_BIG;
334 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
335 static  void    w_oconv(nkf_char c2,nkf_char c1);
336 static  void    w_oconv16(nkf_char c2,nkf_char c1);
337 static  void    w_oconv32(nkf_char c2,nkf_char c1);
338 #endif
339 static  void    e_oconv(nkf_char c2,nkf_char c1);
340 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
341 static  void    s_oconv(nkf_char c2,nkf_char c1);
342 static  void    j_oconv(nkf_char c2,nkf_char c1);
343 static  void    fold_conv(nkf_char c2,nkf_char c1);
344 static  void    nl_conv(nkf_char c2,nkf_char c1);
345 static  void    z_conv(nkf_char c2,nkf_char c1);
346 static  void    rot_conv(nkf_char c2,nkf_char c1);
347 static  void    hira_conv(nkf_char c2,nkf_char c1);
348 static  void    base64_conv(nkf_char c2,nkf_char c1);
349 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
350 static  void    no_connection(nkf_char c2,nkf_char c1);
351 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
352
353 static  void    code_score(struct input_code *ptr);
354 static  void    code_status(nkf_char c);
355
356 static  void    std_putc(nkf_char c);
357 static  nkf_char     std_getc(FILE *f);
358 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
359
360 static  nkf_char     broken_getc(FILE *f);
361 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
362
363 static  nkf_char     mime_begin(FILE *f);
364 static  nkf_char     mime_getc(FILE *f);
365 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
366
367 static  void    switch_mime_getc(void);
368 static  void    unswitch_mime_getc(void);
369 static  nkf_char     mime_begin_strict(FILE *f);
370 static  nkf_char     mime_getc_buf(FILE *f);
371 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
372 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
373
374 static  nkf_char     base64decode(nkf_char c);
375 static  void    mime_prechar(nkf_char c2, nkf_char c1);
376 static  void    mime_putc(nkf_char c);
377 static  void    open_mime(nkf_char c);
378 static  void    close_mime(void);
379 static  void    eof_mime(void);
380 static  void    mimeout_addchar(nkf_char c);
381 #ifndef PERL_XS
382 static  void    usage(void);
383 static  void    version(void);
384 #endif
385 static  void    options(unsigned char *c);
386 #if defined(PERL_XS) || defined(WIN32DLL)
387 static  void    reinit(void);
388 #endif
389
390 /* buffers */
391
392 #if !defined(PERL_XS) && !defined(WIN32DLL)
393 static unsigned char   stdibuf[IOBUF_SIZE];
394 static unsigned char   stdobuf[IOBUF_SIZE];
395 #endif
396 static unsigned char   hold_buf[HOLD_SIZE*2];
397 static int             hold_count = 0;
398
399 /* MIME preprocessor fifo */
400
401 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
402 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
403 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
404 static unsigned char           mime_buf[MIME_BUF_SIZE];
405 static unsigned int            mime_top = 0;
406 static unsigned int            mime_last = 0;  /* decoded */
407 static unsigned int            mime_input = 0; /* undecoded */
408 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
409
410 /* flags */
411 static int             unbuf_f = FALSE;
412 static int             estab_f = FALSE;
413 static int             nop_f = FALSE;
414 static int             binmode_f = TRUE;       /* binary mode */
415 static int             rot_f = FALSE;          /* rot14/43 mode */
416 static int             hira_f = FALSE;          /* hira/kata henkan */
417 static int             input_f = FALSE;        /* non fixed input code  */
418 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
419 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
420 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
421 static int             mimebuf_f = FALSE;      /* MIME buffered input */
422 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
423 static int             iso8859_f = FALSE;      /* ISO8859 through */
424 static int             mimeout_f = FALSE;       /* base64 mode */
425 #if defined(MSDOS) || defined(__OS2__)
426 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
427 #else
428 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
429 #endif
430 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
431
432 #ifdef UNICODE_NORMALIZATION
433 static int nfc_f = FALSE;
434 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
435 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
436 static nkf_char nfc_getc(FILE *f);
437 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
438 #endif
439
440 #ifdef INPUT_OPTION
441 static int cap_f = FALSE;
442 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
443 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
444 static nkf_char cap_getc(FILE *f);
445 static nkf_char cap_ungetc(nkf_char c,FILE *f);
446
447 static int url_f = FALSE;
448 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
449 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
450 static nkf_char url_getc(FILE *f);
451 static nkf_char url_ungetc(nkf_char c,FILE *f);
452 #endif
453
454 #if defined(INT_IS_SHORT)
455 #define NKF_INT32_C(n)   (n##L)
456 #else
457 #define NKF_INT32_C(n)   (n)
458 #endif
459 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
460 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
461 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
462 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
463 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
464 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
465 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
466
467 #ifdef NUMCHAR_OPTION
468 static int numchar_f = FALSE;
469 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
470 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
471 static nkf_char numchar_getc(FILE *f);
472 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
473 #endif
474
475 #ifdef CHECK_OPTION
476 static int noout_f = FALSE;
477 static void no_putc(nkf_char c);
478 static nkf_char debug_f = FALSE;
479 static void debug(const char *str);
480 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
481 #endif
482
483 static int guess_f = FALSE;
484 #if !defined PERL_XS
485 static  void    print_guessed_code(char *filename);
486 #endif
487 static  void    set_input_codename(char *codename);
488 static int is_inputcode_mixed = FALSE;
489
490 #ifdef EXEC_IO
491 static int exec_f = 0;
492 #endif
493
494 #ifdef SHIFTJIS_CP932
495 /* invert IBM extended characters to others */
496 static int cp51932_f = FALSE;
497
498 /* invert NEC-selected IBM extended characters to IBM extended characters */
499 static int cp932inv_f = TRUE;
500
501 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
502 #endif /* SHIFTJIS_CP932 */
503
504 #ifdef X0212_ENABLE
505 static int x0212_f = FALSE;
506 static nkf_char x0212_shift(nkf_char c);
507 static nkf_char x0212_unshift(nkf_char c);
508 #endif
509 static int x0213_f = FALSE;
510
511 static unsigned char prefix_table[256];
512
513 static void set_code_score(struct input_code *ptr, nkf_char score);
514 static void clr_code_score(struct input_code *ptr, nkf_char score);
515 static void status_disable(struct input_code *ptr);
516 static void status_push_ch(struct input_code *ptr, nkf_char c);
517 static void status_clear(struct input_code *ptr);
518 static void status_reset(struct input_code *ptr);
519 static void status_reinit(struct input_code *ptr);
520 static void status_check(struct input_code *ptr, nkf_char c);
521 static void e_status(struct input_code *, nkf_char);
522 static void s_status(struct input_code *, nkf_char);
523
524 struct input_code input_code_list[] = {
525     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
526     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
527 #ifdef UTF8_INPUT_ENABLE
528     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
529     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
530     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
531 #endif
532     {0}
533 };
534
535 static int              mimeout_mode = 0;
536 static int              base64_count = 0;
537
538 /* X0208 -> ASCII converter */
539
540 /* fold parameter */
541 static int             f_line = 0;    /* chars in line */
542 static int             f_prev = 0;
543 static int             fold_preserve_f = FALSE; /* preserve new lines */
544 static int             fold_f  = FALSE;
545 static int             fold_len  = 0;
546
547 /* options */
548 static unsigned char   kanji_intro = DEFAULT_J;
549 static unsigned char   ascii_intro = DEFAULT_R;
550
551 /* Folding */
552
553 #define FOLD_MARGIN  10
554 #define DEFAULT_FOLD 60
555
556 static int             fold_margin  = FOLD_MARGIN;
557
558 /* converters */
559
560 #ifdef DEFAULT_CODE_JIS
561 #   define  DEFAULT_CONV j_oconv
562 #endif
563 #ifdef DEFAULT_CODE_SJIS
564 #   define  DEFAULT_CONV s_oconv
565 #endif
566 #ifdef DEFAULT_CODE_EUC
567 #   define  DEFAULT_CONV e_oconv
568 #endif
569 #ifdef DEFAULT_CODE_UTF8
570 #   define  DEFAULT_CONV w_oconv
571 #endif
572
573 /* process default */
574 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
575
576 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
577 /* s_iconv or oconv */
578 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
579
580 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
581 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
582 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
583 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
584 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
585 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
586 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
587
588 /* static redirections */
589
590 static  void   (*o_putc)(nkf_char c) = std_putc;
591
592 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
593 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
594
595 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
596 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
597
598 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
599
600 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
601 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
602
603 /* for strict mime */
604 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
605 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
606
607 /* Global states */
608 static int output_mode = ASCII,    /* output kanji mode */
609            input_mode =  ASCII,    /* input kanji mode */
610            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
611 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
612
613 /* X0201 / X0208 conversion tables */
614
615 /* X0201 kana conversion table */
616 /* 90-9F A0-DF */
617 static const unsigned char cv[]= {
618     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
619     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
620     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
621     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
622     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
623     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
624     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
625     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
626     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
627     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
628     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
629     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
630     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
631     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
632     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
633     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
634     0x00,0x00};
635
636
637 /* X0201 kana conversion table for daguten */
638 /* 90-9F A0-DF */
639 static const unsigned char dv[]= {
640     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
645     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
646     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
647     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
648     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
649     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
650     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
651     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
652     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
653     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
654     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
656     0x00,0x00};
657
658 /* X0201 kana conversion table for han-daguten */
659 /* 90-9F A0-DF */
660 static const unsigned char ev[]= {
661     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
672     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
673     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
674     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
675     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
677     0x00,0x00};
678
679
680 /* X0208 kigou conversion table */
681 /* 0x8140 - 0x819e */
682 static const unsigned char fv[] = {
683
684     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
685     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
686     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
687     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
688     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
689     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
690     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
691     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
692     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
693     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
694     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
695     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
696 } ;
697
698
699
700 static int             file_out_f = FALSE;
701 #ifdef OVERWRITE
702 static int             overwrite_f = FALSE;
703 static int             preserve_time_f = FALSE;
704 static int             backup_f = FALSE;
705 static char            *backup_suffix = "";
706 static char *get_backup_filename(const char *suffix, const char *filename);
707 #endif
708
709 static int             nlmode_f = 0;   /* CR, LF, CRLF */
710 static nkf_char prev_cr = 0;
711 #ifdef EASYWIN /*Easy Win */
712 static int             end_check;
713 #endif /*Easy Win */
714
715 #define STD_GC_BUFSIZE (256)
716 nkf_char std_gc_buf[STD_GC_BUFSIZE];
717 nkf_char std_gc_ndx;
718
719 #ifdef WIN32DLL
720 #include "nkf32dll.c"
721 #elif defined(PERL_XS)
722 #else /* WIN32DLL */
723 int main(int argc, char **argv)
724 {
725     FILE  *fin;
726     unsigned char  *cp;
727
728     char *outfname = NULL;
729     char *origfname;
730
731 #ifdef EASYWIN /*Easy Win */
732     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
733 #endif
734
735     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
736         cp = (unsigned char *)*argv;
737         options(cp);
738 #ifdef EXEC_IO
739         if (exec_f){
740             int fds[2], pid;
741             if (pipe(fds) < 0 || (pid = fork()) < 0){
742                 abort();
743             }
744             if (pid == 0){
745                 if (exec_f > 0){
746                     close(fds[0]);
747                     dup2(fds[1], 1);
748                 }else{
749                     close(fds[1]);
750                     dup2(fds[0], 0);
751                 }
752                 execvp(argv[1], &argv[1]);
753             }
754             if (exec_f > 0){
755                 close(fds[1]);
756                 dup2(fds[0], 0);
757             }else{
758                 close(fds[0]);
759                 dup2(fds[1], 1);
760             }
761             argc = 0;
762             break;
763         }
764 #endif
765     }
766     if(x0201_f == WISH_TRUE)
767          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
768
769     if (binmode_f == TRUE)
770 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
771     if (freopen("","wb",stdout) == NULL)
772         return (-1);
773 #else
774     setbinmode(stdout);
775 #endif
776
777     if (unbuf_f)
778       setbuf(stdout, (char *) NULL);
779     else
780       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
781
782     if (argc == 0) {
783       if (binmode_f == TRUE)
784 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
785       if (freopen("","rb",stdin) == NULL) return (-1);
786 #else
787       setbinmode(stdin);
788 #endif
789       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
790       if (nop_f)
791           noconvert(stdin);
792       else {
793           kanji_convert(stdin);
794           if (guess_f) print_guessed_code(NULL);
795       }
796     } else {
797       int nfiles = argc;
798         int is_argument_error = FALSE;
799       while (argc--) {
800             is_inputcode_mixed = FALSE;
801             input_codename = NULL;
802 #ifdef CHECK_OPTION
803             iconv_for_check = 0;
804 #endif
805           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
806               perror(*--argv);
807                 *argv++;
808                 is_argument_error = TRUE;
809                 continue;
810           } else {
811 #ifdef OVERWRITE
812               int fd = 0;
813               int fd_backup = 0;
814 #endif
815
816 /* reopen file for stdout */
817               if (file_out_f == TRUE) {
818 #ifdef OVERWRITE
819                   if (overwrite_f){
820                       outfname = malloc(strlen(origfname)
821                                         + strlen(".nkftmpXXXXXX")
822                                         + 1);
823                       if (!outfname){
824                           perror(origfname);
825                           return -1;
826                       }
827                       strcpy(outfname, origfname);
828 #ifdef MSDOS
829                       {
830                           int i;
831                           for (i = strlen(outfname); i; --i){
832                               if (outfname[i - 1] == '/'
833                                   || outfname[i - 1] == '\\'){
834                                   break;
835                               }
836                           }
837                           outfname[i] = '\0';
838                       }
839                       strcat(outfname, "ntXXXXXX");
840                       mktemp(outfname);
841                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
842                                 S_IREAD | S_IWRITE);
843 #else
844                       strcat(outfname, ".nkftmpXXXXXX");
845                       fd = mkstemp(outfname);
846 #endif
847                       if (fd < 0
848                           || (fd_backup = dup(fileno(stdout))) < 0
849                           || dup2(fd, fileno(stdout)) < 0
850                           ){
851                           perror(origfname);
852                           return -1;
853                       }
854                   }else
855 #endif
856                   if(argc == 1) {
857                       outfname = *argv++;
858                       argc--;
859                   } else {
860                       outfname = "nkf.out";
861                   }
862
863                   if(freopen(outfname, "w", stdout) == NULL) {
864                       perror (outfname);
865                       return (-1);
866                   }
867                   if (binmode_f == TRUE) {
868 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
869                       if (freopen("","wb",stdout) == NULL)
870                            return (-1);
871 #else
872                       setbinmode(stdout);
873 #endif
874                   }
875               }
876               if (binmode_f == TRUE)
877 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
878                  if (freopen("","rb",fin) == NULL)
879                     return (-1);
880 #else
881                  setbinmode(fin);
882 #endif
883               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
884               if (nop_f)
885                   noconvert(fin);
886               else {
887                   char *filename = NULL;
888                   kanji_convert(fin);
889                   if (nfiles > 1) filename = origfname;
890                   if (guess_f) print_guessed_code(filename);
891               }
892               fclose(fin);
893 #ifdef OVERWRITE
894               if (overwrite_f) {
895                   struct stat     sb;
896 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
897                   time_t tb[2];
898 #else
899                   struct utimbuf  tb;
900 #endif
901
902                   fflush(stdout);
903                   close(fd);
904                   if (dup2(fd_backup, fileno(stdout)) < 0){
905                       perror("dup2");
906                   }
907                   if (stat(origfname, &sb)) {
908                       fprintf(stderr, "Can't stat %s\n", origfname);
909                   }
910                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
911                   if (chmod(outfname, sb.st_mode)) {
912                       fprintf(stderr, "Can't set permission %s\n", outfname);
913                   }
914
915                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
916                     if(preserve_time_f){
917 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
918                         tb[0] = tb[1] = sb.st_mtime;
919                         if (utime(outfname, tb)) {
920                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
921                         }
922 #else
923                         tb.actime  = sb.st_atime;
924                         tb.modtime = sb.st_mtime;
925                         if (utime(outfname, &tb)) {
926                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
927                         }
928 #endif
929                     }
930                     if(backup_f){
931                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
932 #ifdef MSDOS
933                         unlink(backup_filename);
934 #endif
935                         if (rename(origfname, backup_filename)) {
936                             perror(backup_filename);
937                             fprintf(stderr, "Can't rename %s to %s\n",
938                                     origfname, backup_filename);
939                         }
940                     }else{
941 #ifdef MSDOS
942                         if (unlink(origfname)){
943                             perror(origfname);
944                         }
945 #endif
946                     }
947                   if (rename(outfname, origfname)) {
948                       perror(origfname);
949                       fprintf(stderr, "Can't rename %s to %s\n",
950                               outfname, origfname);
951                   }
952                   free(outfname);
953               }
954 #endif
955           }
956       }
957         if (is_argument_error)
958             return(-1);
959     }
960 #ifdef EASYWIN /*Easy Win */
961     if (file_out_f == FALSE)
962         scanf("%d",&end_check);
963     else
964         fclose(stdout);
965 #else /* for Other OS */
966     if (file_out_f == TRUE)
967         fclose(stdout);
968 #endif /*Easy Win */
969     return (0);
970 }
971 #endif /* WIN32DLL */
972
973 #ifdef OVERWRITE
974 char *get_backup_filename(const char *suffix, const char *filename)
975 {
976     char *backup_filename;
977     int asterisk_count = 0;
978     int i, j;
979     int filename_length = strlen(filename);
980
981     for(i = 0; suffix[i]; i++){
982         if(suffix[i] == '*') asterisk_count++;
983     }
984
985     if(asterisk_count){
986         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
987         if (!backup_filename){
988             perror("Can't malloc backup filename.");
989             return NULL;
990         }
991
992         for(i = 0, j = 0; suffix[i];){
993             if(suffix[i] == '*'){
994                 backup_filename[j] = '\0';
995                 strncat(backup_filename, filename, filename_length);
996                 i++;
997                 j += filename_length;
998             }else{
999                 backup_filename[j++] = suffix[i++];
1000             }
1001         }
1002         backup_filename[j] = '\0';
1003     }else{
1004         j = strlen(suffix) + filename_length;
1005         backup_filename = malloc( + 1);
1006         strcpy(backup_filename, filename);
1007         strcat(backup_filename, suffix);
1008         backup_filename[j] = '\0';
1009     }
1010     return backup_filename;
1011 }
1012 #endif
1013
1014 static const struct {
1015     const char *name;
1016     const char *alias;
1017 } long_option[] = {
1018     {"ic=", ""},
1019     {"oc=", ""},
1020     {"base64","jMB"},
1021     {"euc","e"},
1022     {"euc-input","E"},
1023     {"fj","jm"},
1024     {"help","v"},
1025     {"jis","j"},
1026     {"jis-input","J"},
1027     {"mac","sLm"},
1028     {"mime","jM"},
1029     {"mime-input","m"},
1030     {"msdos","sLw"},
1031     {"sjis","s"},
1032     {"sjis-input","S"},
1033     {"unix","eLu"},
1034     {"version","V"},
1035     {"windows","sLw"},
1036     {"hiragana","h1"},
1037     {"katakana","h2"},
1038     {"katakana-hiragana","h3"},
1039     {"guess", "g"},
1040     {"cp932", ""},
1041     {"no-cp932", ""},
1042 #ifdef X0212_ENABLE
1043     {"x0212", ""},
1044 #endif
1045 #ifdef UTF8_OUTPUT_ENABLE
1046     {"utf8", "w"},
1047     {"utf16", "w16"},
1048     {"ms-ucs-map", ""},
1049     {"fb-skip", ""},
1050     {"fb-html", ""},
1051     {"fb-xml", ""},
1052     {"fb-perl", ""},
1053     {"fb-java", ""},
1054     {"fb-subchar", ""},
1055     {"fb-subchar=", ""},
1056 #endif
1057 #ifdef UTF8_INPUT_ENABLE
1058     {"utf8-input", "W"},
1059     {"utf16-input", "W16"},
1060     {"no-cp932ext", ""},
1061     {"no-best-fit-chars",""},
1062 #endif
1063 #ifdef UNICODE_NORMALIZATION
1064     {"utf8mac-input", ""},
1065 #endif
1066 #ifdef OVERWRITE
1067     {"overwrite", ""},
1068     {"overwrite=", ""},
1069     {"in-place", ""},
1070     {"in-place=", ""},
1071 #endif
1072 #ifdef INPUT_OPTION
1073     {"cap-input", ""},
1074     {"url-input", ""},
1075 #endif
1076 #ifdef NUMCHAR_OPTION
1077     {"numchar-input", ""},
1078 #endif
1079 #ifdef CHECK_OPTION
1080     {"no-output", ""},
1081     {"debug", ""},
1082 #endif
1083 #ifdef SHIFTJIS_CP932
1084     {"cp932inv", ""},
1085 #endif
1086 #ifdef EXEC_IO
1087     {"exec-in", ""},
1088     {"exec-out", ""},
1089 #endif
1090     {"prefix=", ""},
1091 };
1092
1093 static int option_mode = 0;
1094
1095 void options(unsigned char *cp)
1096 {
1097     nkf_char i, j;
1098     unsigned char *p;
1099     unsigned char *cp_back = NULL;
1100     char codeset[32];
1101
1102     if (option_mode==1)
1103         return;
1104     while(*cp && *cp++!='-');
1105     while (*cp || cp_back) {
1106         if(!*cp){
1107             cp = cp_back;
1108             cp_back = NULL;
1109             continue;
1110         }
1111         p = 0;
1112         switch (*cp++) {
1113         case '-':  /* literal options */
1114             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1115                 option_mode = 1;
1116                 return;
1117             }
1118             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1119                 p = (unsigned char *)long_option[i].name;
1120                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1121                 if (*p == cp[j] || cp[j] == SP){
1122                     p = &cp[j] + 1;
1123                     break;
1124                 }
1125                 p = 0;
1126             }
1127             if (p == 0) return;
1128             while(*cp && *cp != SP && cp++);
1129             if (long_option[i].alias[0]){
1130                 cp_back = cp;
1131                 cp = (unsigned char *)long_option[i].alias;
1132             }else{
1133                 if (strcmp(long_option[i].name, "ic=") == 0){
1134                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1135                         codeset[i] = nkf_toupper(p[i]);
1136                     }
1137                     codeset[i] = 0;
1138                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1139                         input_f = JIS_INPUT;
1140                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1141                       strcmp(codeset, "CP50220") == 0 ||
1142                       strcmp(codeset, "CP50221") == 0 ||
1143                       strcmp(codeset, "CP50222") == 0){
1144                         input_f = JIS_INPUT;
1145 #ifdef SHIFTJIS_CP932
1146                         cp51932_f = TRUE;
1147 #endif
1148 #ifdef UTF8_OUTPUT_ENABLE
1149                         ms_ucs_map_f = UCS_MAP_CP932;
1150 #endif
1151                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1152                         input_f = JIS_INPUT;
1153 #ifdef X0212_ENABLE
1154                         x0212_f = TRUE;
1155 #endif
1156                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1157                         input_f = JIS_INPUT;
1158 #ifdef X0212_ENABLE
1159                         x0212_f = TRUE;
1160 #endif
1161                         x0213_f = TRUE;
1162                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1163                         input_f = SJIS_INPUT;
1164                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1165                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1166                              strcmp(codeset, "CP932") == 0 ||
1167                              strcmp(codeset, "MS932") == 0){
1168                         input_f = SJIS_INPUT;
1169 #ifdef SHIFTJIS_CP932
1170                         cp51932_f = TRUE;
1171 #endif
1172 #ifdef UTF8_OUTPUT_ENABLE
1173                         ms_ucs_map_f = UCS_MAP_CP932;
1174 #endif
1175                     }else if(strcmp(codeset, "CP10001") == 0){
1176                         input_f = SJIS_INPUT;
1177 #ifdef SHIFTJIS_CP932
1178                         cp51932_f = TRUE;
1179 #endif
1180 #ifdef UTF8_OUTPUT_ENABLE
1181                         ms_ucs_map_f = UCS_MAP_CP10001;
1182 #endif
1183                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1184                              strcmp(codeset, "EUC-JP") == 0){
1185                         input_f = EUC_INPUT;
1186                     }else if(strcmp(codeset, "CP51932") == 0){
1187                         input_f = EUC_INPUT;
1188 #ifdef SHIFTJIS_CP932
1189                         cp51932_f = TRUE;
1190 #endif
1191 #ifdef UTF8_OUTPUT_ENABLE
1192                         ms_ucs_map_f = UCS_MAP_CP932;
1193 #endif
1194                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1195                              strcmp(codeset, "EUCJP-MS") == 0 ||
1196                              strcmp(codeset, "EUCJPMS") == 0){
1197                         input_f = EUC_INPUT;
1198 #ifdef SHIFTJIS_CP932
1199                         cp51932_f = FALSE;
1200 #endif
1201 #ifdef UTF8_OUTPUT_ENABLE
1202                         ms_ucs_map_f = UCS_MAP_MS;
1203 #endif
1204                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1205                              strcmp(codeset, "EUCJP-ASCII") == 0){
1206                         input_f = EUC_INPUT;
1207 #ifdef SHIFTJIS_CP932
1208                         cp51932_f = FALSE;
1209 #endif
1210 #ifdef UTF8_OUTPUT_ENABLE
1211                         ms_ucs_map_f = UCS_MAP_ASCII;
1212 #endif
1213                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1214                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1215                         input_f = SJIS_INPUT;
1216                         x0213_f = TRUE;
1217 #ifdef SHIFTJIS_CP932
1218                         cp51932_f = FALSE;
1219 #endif
1220                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1221                              strcmp(codeset, "EUC-JIS-2004") == 0){
1222                         input_f = EUC_INPUT;
1223                         x0213_f = TRUE;
1224 #ifdef SHIFTJIS_CP932
1225                         cp51932_f = FALSE;
1226 #endif
1227 #ifdef UTF8_INPUT_ENABLE
1228                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1229                              strcmp(codeset, "UTF-8N") == 0 ||
1230                              strcmp(codeset, "UTF-8-BOM") == 0){
1231                         input_f = UTF8_INPUT;
1232 #ifdef UNICODE_NORMALIZATION
1233                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1234                              strcmp(codeset, "UTF-8-MAC") == 0){
1235                         input_f = UTF8_INPUT;
1236                         nfc_f = TRUE;
1237 #endif
1238                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1239                              strcmp(codeset, "UTF-16BE") == 0 ||
1240                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1241                         input_f = UTF16_INPUT;
1242                         input_endian = ENDIAN_BIG;
1243                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1244                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1245                         input_f = UTF16_INPUT;
1246                         input_endian = ENDIAN_LITTLE;
1247                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1248                              strcmp(codeset, "UTF-32BE") == 0 ||
1249                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1250                         input_f = UTF32_INPUT;
1251                         input_endian = ENDIAN_BIG;
1252                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1253                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1254                         input_f = UTF32_INPUT;
1255                         input_endian = ENDIAN_LITTLE;
1256 #endif
1257                     }
1258                     continue;
1259                 }
1260                 if (strcmp(long_option[i].name, "oc=") == 0){
1261                     x0201_f = FALSE;
1262                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1263                         codeset[i] = nkf_toupper(p[i]);
1264                     }
1265                     codeset[i] = 0;
1266                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1267                         output_conv = j_oconv;
1268                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1269                         output_conv = j_oconv;
1270                         no_cp932ext_f = TRUE;
1271 #ifdef SHIFTJIS_CP932
1272                         cp932inv_f = FALSE;
1273 #endif
1274 #ifdef UTF8_OUTPUT_ENABLE
1275                         ms_ucs_map_f = UCS_MAP_CP932;
1276 #endif
1277                     }else if(strcmp(codeset, "CP50220") == 0){
1278                         output_conv = j_oconv;
1279                         x0201_f = TRUE;
1280 #ifdef SHIFTJIS_CP932
1281                         cp932inv_f = FALSE;
1282 #endif
1283 #ifdef UTF8_OUTPUT_ENABLE
1284                         ms_ucs_map_f = UCS_MAP_CP932;
1285 #endif
1286                     }else if(strcmp(codeset, "CP50221") == 0){
1287                         output_conv = j_oconv;
1288 #ifdef SHIFTJIS_CP932
1289                         cp932inv_f = FALSE;
1290 #endif
1291 #ifdef UTF8_OUTPUT_ENABLE
1292                         ms_ucs_map_f = UCS_MAP_CP932;
1293 #endif
1294                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1295                         output_conv = j_oconv;
1296 #ifdef X0212_ENABLE
1297                         x0212_f = TRUE;
1298 #endif
1299 #ifdef SHIFTJIS_CP932
1300                         cp932inv_f = FALSE;
1301 #endif
1302                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1303                         output_conv = j_oconv;
1304 #ifdef X0212_ENABLE
1305                         x0212_f = TRUE;
1306 #endif
1307                         x0213_f = TRUE;
1308 #ifdef SHIFTJIS_CP932
1309                         cp932inv_f = FALSE;
1310 #endif
1311                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1312                         output_conv = s_oconv;
1313                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1314                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1315                              strcmp(codeset, "CP932") == 0 ||
1316                              strcmp(codeset, "MS932") == 0){
1317                         output_conv = s_oconv;
1318 #ifdef UTF8_OUTPUT_ENABLE
1319                         ms_ucs_map_f = UCS_MAP_CP932;
1320 #endif
1321                     }else if(strcmp(codeset, "CP10001") == 0){
1322                         output_conv = s_oconv;
1323 #ifdef UTF8_OUTPUT_ENABLE
1324                         ms_ucs_map_f = UCS_MAP_CP10001;
1325 #endif
1326                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1327                              strcmp(codeset, "EUC-JP") == 0){
1328                         output_conv = e_oconv;
1329                     }else if(strcmp(codeset, "CP51932") == 0){
1330                         output_conv = e_oconv;
1331 #ifdef SHIFTJIS_CP932
1332                         cp932inv_f = FALSE;
1333 #endif
1334 #ifdef UTF8_OUTPUT_ENABLE
1335                         ms_ucs_map_f = UCS_MAP_CP932;
1336 #endif
1337                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1338                              strcmp(codeset, "EUCJP-MS") == 0 ||
1339                              strcmp(codeset, "EUCJPMS") == 0){
1340                         output_conv = e_oconv;
1341 #ifdef X0212_ENABLE
1342                         x0212_f = TRUE;
1343 #endif
1344 #ifdef UTF8_OUTPUT_ENABLE
1345                         ms_ucs_map_f = UCS_MAP_MS;
1346 #endif
1347                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1348                              strcmp(codeset, "EUCJP-ASCII") == 0){
1349                         output_conv = e_oconv;
1350 #ifdef X0212_ENABLE
1351                         x0212_f = TRUE;
1352 #endif
1353 #ifdef UTF8_OUTPUT_ENABLE
1354                         ms_ucs_map_f = UCS_MAP_ASCII;
1355 #endif
1356                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1357                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1358                         output_conv = s_oconv;
1359                         x0213_f = TRUE;
1360 #ifdef SHIFTJIS_CP932
1361                         cp932inv_f = FALSE;
1362 #endif
1363                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1364                              strcmp(codeset, "EUC-JIS-2004") == 0){
1365                         output_conv = e_oconv;
1366 #ifdef X0212_ENABLE
1367                         x0212_f = TRUE;
1368 #endif
1369                         x0213_f = TRUE;
1370 #ifdef SHIFTJIS_CP932
1371                         cp932inv_f = FALSE;
1372 #endif
1373 #ifdef UTF8_OUTPUT_ENABLE
1374                     }else if(strcmp(codeset, "UTF-8") == 0){
1375                         output_conv = w_oconv;
1376                     }else if(strcmp(codeset, "UTF-8N") == 0){
1377                         output_conv = w_oconv;
1378                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1379                         output_conv = w_oconv;
1380                         output_bom_f = TRUE;
1381                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1382                         output_conv = w_oconv16;
1383                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1384                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1385                         output_conv = w_oconv16;
1386                         output_bom_f = TRUE;
1387                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1388                         output_conv = w_oconv16;
1389                         output_endian = ENDIAN_LITTLE;
1390                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1391                         output_conv = w_oconv16;
1392                         output_endian = ENDIAN_LITTLE;
1393                         output_bom_f = TRUE;
1394                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1395                              strcmp(codeset, "UTF-32BE") == 0){
1396                         output_conv = w_oconv32;
1397                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1398                         output_conv = w_oconv32;
1399                         output_bom_f = TRUE;
1400                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1401                         output_conv = w_oconv32;
1402                         output_endian = ENDIAN_LITTLE;
1403                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1404                         output_conv = w_oconv32;
1405                         output_endian = ENDIAN_LITTLE;
1406                         output_bom_f = TRUE;
1407 #endif
1408                     }
1409                     continue;
1410                 }
1411 #ifdef OVERWRITE
1412                 if (strcmp(long_option[i].name, "overwrite") == 0){
1413                     file_out_f = TRUE;
1414                     overwrite_f = TRUE;
1415                     preserve_time_f = TRUE;
1416                     continue;
1417                 }
1418                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1419                     file_out_f = TRUE;
1420                     overwrite_f = TRUE;
1421                     preserve_time_f = TRUE;
1422                     backup_f = TRUE;
1423                     backup_suffix = malloc(strlen((char *) p) + 1);
1424                     strcpy(backup_suffix, (char *) p);
1425                     continue;
1426                 }
1427                 if (strcmp(long_option[i].name, "in-place") == 0){
1428                     file_out_f = TRUE;
1429                     overwrite_f = TRUE;
1430                     preserve_time_f = FALSE;
1431                     continue;
1432                 }
1433                 if (strcmp(long_option[i].name, "in-place=") == 0){
1434                     file_out_f = TRUE;
1435                     overwrite_f = TRUE;
1436                     preserve_time_f = FALSE;
1437                     backup_f = TRUE;
1438                     backup_suffix = malloc(strlen((char *) p) + 1);
1439                     strcpy(backup_suffix, (char *) p);
1440                     continue;
1441                 }
1442 #endif
1443 #ifdef INPUT_OPTION
1444                 if (strcmp(long_option[i].name, "cap-input") == 0){
1445                     cap_f = TRUE;
1446                     continue;
1447                 }
1448                 if (strcmp(long_option[i].name, "url-input") == 0){
1449                     url_f = TRUE;
1450                     continue;
1451                 }
1452 #endif
1453 #ifdef NUMCHAR_OPTION
1454                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1455                     numchar_f = TRUE;
1456                     continue;
1457                 }
1458 #endif
1459 #ifdef CHECK_OPTION
1460                 if (strcmp(long_option[i].name, "no-output") == 0){
1461                     noout_f = TRUE;
1462                     continue;
1463                 }
1464                 if (strcmp(long_option[i].name, "debug") == 0){
1465                     debug_f = TRUE;
1466                     continue;
1467                 }
1468 #endif
1469                 if (strcmp(long_option[i].name, "cp932") == 0){
1470 #ifdef SHIFTJIS_CP932
1471                     cp51932_f = TRUE;
1472                     cp932inv_f = TRUE;
1473 #endif
1474 #ifdef UTF8_OUTPUT_ENABLE
1475                     ms_ucs_map_f = UCS_MAP_CP932;
1476 #endif
1477                     continue;
1478                 }
1479                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1480 #ifdef SHIFTJIS_CP932
1481                     cp51932_f = FALSE;
1482                     cp932inv_f = FALSE;
1483 #endif
1484 #ifdef UTF8_OUTPUT_ENABLE
1485                     ms_ucs_map_f = UCS_MAP_ASCII;
1486 #endif
1487                     continue;
1488                 }
1489 #ifdef SHIFTJIS_CP932
1490                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1491                     cp932inv_f = TRUE;
1492                     continue;
1493                 }
1494 #endif
1495
1496 #ifdef X0212_ENABLE
1497                 if (strcmp(long_option[i].name, "x0212") == 0){
1498                     x0212_f = TRUE;
1499                     continue;
1500                 }
1501 #endif
1502
1503 #ifdef EXEC_IO
1504                   if (strcmp(long_option[i].name, "exec-in") == 0){
1505                       exec_f = 1;
1506                       return;
1507                   }
1508                   if (strcmp(long_option[i].name, "exec-out") == 0){
1509                       exec_f = -1;
1510                       return;
1511                   }
1512 #endif
1513 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1514                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1515                     no_cp932ext_f = TRUE;
1516                     continue;
1517                 }
1518                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1519                     no_best_fit_chars_f = TRUE;
1520                     continue;
1521                 }
1522                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1523                     encode_fallback = NULL;
1524                     continue;
1525                 }
1526                 if (strcmp(long_option[i].name, "fb-html") == 0){
1527                     encode_fallback = encode_fallback_html;
1528                     continue;
1529                 }
1530                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1531                     encode_fallback = encode_fallback_xml;
1532                     continue;
1533                 }
1534                 if (strcmp(long_option[i].name, "fb-java") == 0){
1535                     encode_fallback = encode_fallback_java;
1536                     continue;
1537                 }
1538                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1539                     encode_fallback = encode_fallback_perl;
1540                     continue;
1541                 }
1542                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1543                     encode_fallback = encode_fallback_subchar;
1544                     continue;
1545                 }
1546                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1547                     encode_fallback = encode_fallback_subchar;
1548                     unicode_subchar = 0;
1549                     if (p[0] != '0'){
1550                         /* decimal number */
1551                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1552                             unicode_subchar *= 10;
1553                             unicode_subchar += hex2bin(p[i]);
1554                         }
1555                     }else if(p[1] == 'x' || p[1] == 'X'){
1556                         /* hexadecimal number */
1557                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1558                             unicode_subchar <<= 4;
1559                             unicode_subchar |= hex2bin(p[i]);
1560                         }
1561                     }else{
1562                         /* octal number */
1563                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1564                             unicode_subchar *= 8;
1565                             unicode_subchar += hex2bin(p[i]);
1566                         }
1567                     }
1568                     w16e_conv(unicode_subchar, &i, &j);
1569                     unicode_subchar = i<<8 | j;
1570                     continue;
1571                 }
1572 #endif
1573 #ifdef UTF8_OUTPUT_ENABLE
1574                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1575                     ms_ucs_map_f = UCS_MAP_MS;
1576                     continue;
1577                 }
1578 #endif
1579 #ifdef UNICODE_NORMALIZATION
1580                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1581                     input_f = UTF8_INPUT;
1582                     nfc_f = TRUE;
1583                     continue;
1584                 }
1585 #endif
1586                 if (strcmp(long_option[i].name, "prefix=") == 0){
1587                     if (nkf_isgraph(p[0])){
1588                         for (i = 1; nkf_isgraph(p[i]); i++){
1589                             prefix_table[p[i]] = p[0];
1590                         }
1591                     }
1592                     continue;
1593                 }
1594             }
1595             continue;
1596         case 'b':           /* buffered mode */
1597             unbuf_f = FALSE;
1598             continue;
1599         case 'u':           /* non bufferd mode */
1600             unbuf_f = TRUE;
1601             continue;
1602         case 't':           /* transparent mode */
1603             if (*cp=='1') {
1604                 /* alias of -t */
1605                 nop_f = TRUE;
1606                 *cp++;
1607             } else if (*cp=='2') {
1608                 /*
1609                  * -t with put/get
1610                  *
1611                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1612                  *
1613                  */
1614                 nop_f = 2;
1615                 *cp++;
1616             } else
1617                 nop_f = TRUE;
1618             continue;
1619         case 'j':           /* JIS output */
1620         case 'n':
1621             output_conv = j_oconv;
1622             continue;
1623         case 'e':           /* AT&T EUC output */
1624             output_conv = e_oconv;
1625             cp932inv_f = FALSE;
1626             continue;
1627         case 's':           /* SJIS output */
1628             output_conv = s_oconv;
1629             continue;
1630         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1631             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1632             input_f = LATIN1_INPUT;
1633             continue;
1634         case 'i':           /* Kanji IN ESC-$-@/B */
1635             if (*cp=='@'||*cp=='B')
1636                 kanji_intro = *cp++;
1637             continue;
1638         case 'o':           /* ASCII IN ESC-(-J/B */
1639             if (*cp=='J'||*cp=='B'||*cp=='H')
1640                 ascii_intro = *cp++;
1641             continue;
1642         case 'h':
1643             /*
1644                 bit:1   katakana->hiragana
1645                 bit:2   hiragana->katakana
1646             */
1647             if ('9'>= *cp && *cp>='0')
1648                 hira_f |= (*cp++ -'0');
1649             else
1650                 hira_f |= 1;
1651             continue;
1652         case 'r':
1653             rot_f = TRUE;
1654             continue;
1655 #if defined(MSDOS) || defined(__OS2__)
1656         case 'T':
1657             binmode_f = FALSE;
1658             continue;
1659 #endif
1660 #ifndef PERL_XS
1661         case 'V':
1662             version();
1663             exit(1);
1664             break;
1665         case 'v':
1666             usage();
1667             exit(1);
1668             break;
1669 #endif
1670 #ifdef UTF8_OUTPUT_ENABLE
1671         case 'w':           /* UTF-8 output */
1672             if (cp[0] == '8') {
1673                 output_conv = w_oconv; cp++;
1674                 if (cp[0] == '0'){
1675                     cp++;
1676                 } else {
1677                     output_bom_f = TRUE;
1678                 }
1679             } else {
1680                 if ('1'== cp[0] && '6'==cp[1]) {
1681                     output_conv = w_oconv16; cp+=2;
1682                 } else if ('3'== cp[0] && '2'==cp[1]) {
1683                     output_conv = w_oconv32; cp+=2;
1684                 } else {
1685                     output_conv = w_oconv;
1686                     continue;
1687                 }
1688                 if (cp[0]=='L') {
1689                     cp++;
1690                     output_endian = ENDIAN_LITTLE;
1691                 } else if (cp[0] == 'B') {
1692                     cp++;
1693                 } else {
1694                     continue;
1695                 }
1696                 if (cp[0] == '0'){
1697                     cp++;
1698                 } else {
1699                     output_bom_f = TRUE;
1700                 }
1701             }
1702             continue;
1703 #endif
1704 #ifdef UTF8_INPUT_ENABLE
1705         case 'W':           /* UTF input */
1706             if (cp[0] == '8') {
1707                 cp++;
1708                 input_f = UTF8_INPUT;
1709             }else{
1710                 if ('1'== cp[0] && '6'==cp[1]) {
1711                     cp += 2;
1712                     input_f = UTF16_INPUT;
1713                     input_endian = ENDIAN_BIG;
1714                 } else if ('3'== cp[0] && '2'==cp[1]) {
1715                     cp += 2;
1716                     input_f = UTF32_INPUT;
1717                     input_endian = ENDIAN_BIG;
1718                 } else {
1719                     input_f = UTF8_INPUT;
1720                     continue;
1721                 }
1722                 if (cp[0]=='L') {
1723                     cp++;
1724                     input_endian = ENDIAN_LITTLE;
1725                 } else if (cp[0] == 'B') {
1726                     cp++;
1727                 }
1728             }
1729             continue;
1730 #endif
1731         /* Input code assumption */
1732         case 'J':   /* JIS input */
1733             input_f = JIS_INPUT;
1734             continue;
1735         case 'E':   /* AT&T EUC input */
1736             input_f = EUC_INPUT;
1737             continue;
1738         case 'S':   /* MS Kanji input */
1739             input_f = SJIS_INPUT;
1740             if (x0201_f==NO_X0201) x0201_f=TRUE;
1741             continue;
1742         case 'Z':   /* Convert X0208 alphabet to asii */
1743             /* alpha_f
1744                bit:0   Convert JIS X 0208 Alphabet to ASCII
1745                bit:1   Convert Kankaku to one space
1746                bit:2   Convert Kankaku to two spaces
1747                bit:3   Convert HTML Entity
1748                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1749             */
1750             while ('0'<= *cp && *cp <='9') {
1751                 alpha_f |= 1 << (*cp++ - '0');
1752             }
1753             if (!alpha_f) alpha_f = 1;
1754             continue;
1755         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1756             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1757             /* accept  X0201
1758                     ESC-(-I     in JIS, EUC, MS Kanji
1759                     SI/SO       in JIS, EUC, MS Kanji
1760                     SSO         in EUC, JIS, not in MS Kanji
1761                     MS Kanji (0xa0-0xdf)
1762                output  X0201
1763                     ESC-(-I     in JIS (0x20-0x5f)
1764                     SSO         in EUC (0xa0-0xdf)
1765                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1766             */
1767             continue;
1768         case 'X':   /* Assume X0201 kana */
1769             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1770             x0201_f = TRUE;
1771             continue;
1772         case 'F':   /* prserve new lines */
1773             fold_preserve_f = TRUE;
1774         case 'f':   /* folding -f60 or -f */
1775             fold_f = TRUE;
1776             fold_len = 0;
1777             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1778                 fold_len *= 10;
1779                 fold_len += *cp++ - '0';
1780             }
1781             if (!(0<fold_len && fold_len<BUFSIZ))
1782                 fold_len = DEFAULT_FOLD;
1783             if (*cp=='-') {
1784                 fold_margin = 0;
1785                 cp++;
1786                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1787                     fold_margin *= 10;
1788                     fold_margin += *cp++ - '0';
1789                 }
1790             }
1791             continue;
1792         case 'm':   /* MIME support */
1793             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1794             if (*cp=='B'||*cp=='Q') {
1795                 mime_decode_mode = *cp++;
1796                 mimebuf_f = FIXED_MIME;
1797             } else if (*cp=='N') {
1798                 mime_f = TRUE; cp++;
1799             } else if (*cp=='S') {
1800                 mime_f = STRICT_MIME; cp++;
1801             } else if (*cp=='0') {
1802                 mime_decode_f = FALSE;
1803                 mime_f = FALSE; cp++;
1804             }
1805             continue;
1806         case 'M':   /* MIME output */
1807             if (*cp=='B') {
1808                 mimeout_mode = 'B';
1809                 mimeout_f = FIXED_MIME; cp++;
1810             } else if (*cp=='Q') {
1811                 mimeout_mode = 'Q';
1812                 mimeout_f = FIXED_MIME; cp++;
1813             } else {
1814                 mimeout_f = TRUE;
1815             }
1816             continue;
1817         case 'B':   /* Broken JIS support */
1818             /*  bit:0   no ESC JIS
1819                 bit:1   allow any x on ESC-(-x or ESC-$-x
1820                 bit:2   reset to ascii on NL
1821             */
1822             if ('9'>= *cp && *cp>='0')
1823                 broken_f |= 1<<(*cp++ -'0');
1824             else
1825                 broken_f |= TRUE;
1826             continue;
1827 #ifndef PERL_XS
1828         case 'O':/* for Output file */
1829             file_out_f = TRUE;
1830             continue;
1831 #endif
1832         case 'c':/* add cr code */
1833             nlmode_f = CRLF;
1834             continue;
1835         case 'd':/* delete cr code */
1836             nlmode_f = LF;
1837             continue;
1838         case 'I':   /* ISO-2022-JP output */
1839             iso2022jp_f = TRUE;
1840             continue;
1841         case 'L':  /* line mode */
1842             if (*cp=='u') {         /* unix */
1843                 nlmode_f = LF; cp++;
1844             } else if (*cp=='m') { /* mac */
1845                 nlmode_f = CR; cp++;
1846             } else if (*cp=='w') { /* windows */
1847                 nlmode_f = CRLF; cp++;
1848             } else if (*cp=='0') { /* no conversion  */
1849                 nlmode_f = 0; cp++;
1850             }
1851             continue;
1852         case 'g':
1853 #ifndef PERL_XS
1854             guess_f = TRUE;
1855 #endif
1856             continue;
1857         case SP:
1858         /* module muliple options in a string are allowed for Perl moudle  */
1859             while(*cp && *cp++!='-');
1860             continue;
1861         default:
1862             /* bogus option but ignored */
1863             continue;
1864         }
1865     }
1866 }
1867
1868 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1869 {
1870     if (iconv_func){
1871         struct input_code *p = input_code_list;
1872         while (p->name){
1873             if (iconv_func == p->iconv_func){
1874                 return p;
1875             }
1876             p++;
1877         }
1878     }
1879     return 0;
1880 }
1881
1882 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1883 {
1884 #ifdef INPUT_CODE_FIX
1885     if (f || !input_f)
1886 #endif
1887         if (estab_f != f){
1888             estab_f = f;
1889         }
1890
1891     if (iconv_func
1892 #ifdef INPUT_CODE_FIX
1893         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1894 #endif
1895         ){
1896         iconv = iconv_func;
1897     }
1898 #ifdef CHECK_OPTION
1899     if (estab_f && iconv_for_check != iconv){
1900         struct input_code *p = find_inputcode_byfunc(iconv);
1901         if (p){
1902             set_input_codename(p->name);
1903             debug(p->name);
1904         }
1905         iconv_for_check = iconv;
1906     }
1907 #endif
1908 }
1909
1910 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1911 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1912 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1913 #ifdef SHIFTJIS_CP932
1914 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B */
1915 #define SCORE_NO_EXIST (SCORE_CP932 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1916 #else
1917 #define SCORE_NO_EXIST (SCORE_DEPEND << 1)   /* \e$BB8:_$7$J$$J8;z\e(B */
1918 #endif
1919 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1920 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1921
1922 #define SCORE_INIT (SCORE_iMIME)
1923
1924 static const char score_table_A0[] = {
1925     0, 0, 0, 0,
1926     0, 0, 0, 0,
1927     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1928     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1929 };
1930
1931 static const char score_table_F0[] = {
1932     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1933     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1934     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1935     SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1936 };
1937
1938 void set_code_score(struct input_code *ptr, nkf_char score)
1939 {
1940     if (ptr){
1941         ptr->score |= score;
1942     }
1943 }
1944
1945 void clr_code_score(struct input_code *ptr, nkf_char score)
1946 {
1947     if (ptr){
1948         ptr->score &= ~score;
1949     }
1950 }
1951
1952 void code_score(struct input_code *ptr)
1953 {
1954     nkf_char c2 = ptr->buf[0];
1955 #ifdef UTF8_OUTPUT_ENABLE
1956     nkf_char c1 = ptr->buf[1];
1957 #endif
1958     if (c2 < 0){
1959         set_code_score(ptr, SCORE_ERROR);
1960     }else if (c2 == SSO){
1961         set_code_score(ptr, SCORE_KANA);
1962 #ifdef UTF8_OUTPUT_ENABLE
1963     }else if (!e2w_conv(c2, c1)){
1964         set_code_score(ptr, SCORE_NO_EXIST);
1965 #endif
1966     }else if ((c2 & 0x70) == 0x20){
1967         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1968     }else if ((c2 & 0x70) == 0x70){
1969         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1970     }else if ((c2 & 0x70) >= 0x50){
1971         set_code_score(ptr, SCORE_L2);
1972     }
1973 }
1974
1975 void status_disable(struct input_code *ptr)
1976 {
1977     ptr->stat = -1;
1978     ptr->buf[0] = -1;
1979     code_score(ptr);
1980     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1981 }
1982
1983 void status_push_ch(struct input_code *ptr, nkf_char c)
1984 {
1985     ptr->buf[ptr->index++] = c;
1986 }
1987
1988 void status_clear(struct input_code *ptr)
1989 {
1990     ptr->stat = 0;
1991     ptr->index = 0;
1992 }
1993
1994 void status_reset(struct input_code *ptr)
1995 {
1996     status_clear(ptr);
1997     ptr->score = SCORE_INIT;
1998 }
1999
2000 void status_reinit(struct input_code *ptr)
2001 {
2002     status_reset(ptr);
2003     ptr->_file_stat = 0;
2004 }
2005
2006 void status_check(struct input_code *ptr, nkf_char c)
2007 {
2008     if (c <= DEL && estab_f){
2009         status_reset(ptr);
2010     }
2011 }
2012
2013 void s_status(struct input_code *ptr, nkf_char c)
2014 {
2015     switch(ptr->stat){
2016       case -1:
2017           status_check(ptr, c);
2018           break;
2019       case 0:
2020           if (c <= DEL){
2021               break;
2022 #ifdef NUMCHAR_OPTION
2023           }else if (is_unicode_capsule(c)){
2024               break;
2025 #endif
2026           }else if (0xa1 <= c && c <= 0xdf){
2027               status_push_ch(ptr, SSO);
2028               status_push_ch(ptr, c);
2029               code_score(ptr);
2030               status_clear(ptr);
2031           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2032               ptr->stat = 1;
2033               status_push_ch(ptr, c);
2034 #ifdef SHIFTJIS_CP932
2035           }else if (cp51932_f
2036                     && is_ibmext_in_sjis(c)){
2037               ptr->stat = 2;
2038               status_push_ch(ptr, c);
2039 #endif /* SHIFTJIS_CP932 */
2040 #ifdef X0212_ENABLE
2041           }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2042               ptr->stat = 1;
2043               status_push_ch(ptr, c);
2044 #endif /* X0212_ENABLE */
2045           }else{
2046               status_disable(ptr);
2047           }
2048           break;
2049       case 1:
2050           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2051               status_push_ch(ptr, c);
2052               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2053               code_score(ptr);
2054               status_clear(ptr);
2055           }else{
2056               status_disable(ptr);
2057           }
2058           break;
2059       case 2:
2060 #ifdef SHIFTJIS_CP932
2061           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2062               status_push_ch(ptr, c);
2063               if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2064                   set_code_score(ptr, SCORE_CP932);
2065                   status_clear(ptr);
2066                   break;
2067               }
2068           }
2069 #endif /* SHIFTJIS_CP932 */
2070 #ifndef X0212_ENABLE
2071           status_disable(ptr);
2072 #endif
2073           break;
2074     }
2075 }
2076
2077 void e_status(struct input_code *ptr, nkf_char c)
2078 {
2079     switch (ptr->stat){
2080       case -1:
2081           status_check(ptr, c);
2082           break;
2083       case 0:
2084           if (c <= DEL){
2085               break;
2086 #ifdef NUMCHAR_OPTION
2087           }else if (is_unicode_capsule(c)){
2088               break;
2089 #endif
2090           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2091               ptr->stat = 1;
2092               status_push_ch(ptr, c);
2093 #ifdef X0212_ENABLE
2094           }else if (0x8f == c){
2095               ptr->stat = 2;
2096               status_push_ch(ptr, c);
2097 #endif /* X0212_ENABLE */
2098           }else{
2099               status_disable(ptr);
2100           }
2101           break;
2102       case 1:
2103           if (0xa1 <= c && c <= 0xfe){
2104               status_push_ch(ptr, c);
2105               code_score(ptr);
2106               status_clear(ptr);
2107           }else{
2108               status_disable(ptr);
2109           }
2110           break;
2111 #ifdef X0212_ENABLE
2112       case 2:
2113           if (0xa1 <= c && c <= 0xfe){
2114               ptr->stat = 1;
2115               status_push_ch(ptr, c);
2116           }else{
2117               status_disable(ptr);
2118           }
2119 #endif /* X0212_ENABLE */
2120     }
2121 }
2122
2123 #ifdef UTF8_INPUT_ENABLE
2124 void w_status(struct input_code *ptr, nkf_char c)
2125 {
2126     switch (ptr->stat){
2127       case -1:
2128           status_check(ptr, c);
2129           break;
2130       case 0:
2131           if (c <= DEL){
2132               break;
2133 #ifdef NUMCHAR_OPTION
2134           }else if (is_unicode_capsule(c)){
2135               break;
2136 #endif
2137           }else if (0xc0 <= c && c <= 0xdf){
2138               ptr->stat = 1;
2139               status_push_ch(ptr, c);
2140           }else if (0xe0 <= c && c <= 0xef){
2141               ptr->stat = 2;
2142               status_push_ch(ptr, c);
2143           }else if (0xf0 <= c && c <= 0xf4){
2144               ptr->stat = 3;
2145               status_push_ch(ptr, c);
2146           }else{
2147               status_disable(ptr);
2148           }
2149           break;
2150       case 1:
2151       case 2:
2152           if (0x80 <= c && c <= 0xbf){
2153               status_push_ch(ptr, c);
2154               if (ptr->index > ptr->stat){
2155                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2156                              && ptr->buf[2] == 0xbf);
2157                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2158                            &ptr->buf[0], &ptr->buf[1]);
2159                   if (!bom){
2160                       code_score(ptr);
2161                   }
2162                   status_clear(ptr);
2163               }
2164           }else{
2165               status_disable(ptr);
2166           }
2167           break;
2168       case 3:
2169         if (0x80 <= c && c <= 0xbf){
2170             if (ptr->index < ptr->stat){
2171                 status_push_ch(ptr, c);
2172             } else {
2173                 status_clear(ptr);
2174             }
2175           }else{
2176               status_disable(ptr);
2177           }
2178           break;
2179     }
2180 }
2181 #endif
2182
2183 void code_status(nkf_char c)
2184 {
2185     int action_flag = 1;
2186     struct input_code *result = 0;
2187     struct input_code *p = input_code_list;
2188     while (p->name){
2189         if (!p->status_func) {
2190             ++p;
2191             continue;
2192         }
2193         if (!p->status_func)
2194             continue;
2195         (p->status_func)(p, c);
2196         if (p->stat > 0){
2197             action_flag = 0;
2198         }else if(p->stat == 0){
2199             if (result){
2200                 action_flag = 0;
2201             }else{
2202                 result = p;
2203             }
2204         }
2205         ++p;
2206     }
2207
2208     if (action_flag){
2209         if (result && !estab_f){
2210             set_iconv(TRUE, result->iconv_func);
2211         }else if (c <= DEL){
2212             struct input_code *ptr = input_code_list;
2213             while (ptr->name){
2214                 status_reset(ptr);
2215                 ++ptr;
2216             }
2217         }
2218     }
2219 }
2220
2221 #ifndef WIN32DLL
2222 nkf_char std_getc(FILE *f)
2223 {
2224     if (std_gc_ndx){
2225         return std_gc_buf[--std_gc_ndx];
2226     }
2227     return getc(f);
2228 }
2229 #endif /*WIN32DLL*/
2230
2231 nkf_char std_ungetc(nkf_char c, FILE *f)
2232 {
2233     if (std_gc_ndx == STD_GC_BUFSIZE){
2234         return EOF;
2235     }
2236     std_gc_buf[std_gc_ndx++] = c;
2237     return c;
2238 }
2239
2240 #ifndef WIN32DLL
2241 void std_putc(nkf_char c)
2242 {
2243     if(c!=EOF)
2244       putchar(c);
2245 }
2246 #endif /*WIN32DLL*/
2247
2248 #if !defined(PERL_XS) && !defined(WIN32DLL)
2249 nkf_char noconvert(FILE *f)
2250 {
2251     nkf_char    c;
2252
2253     if (nop_f == 2)
2254         module_connection();
2255     while ((c = (*i_getc)(f)) != EOF)
2256       (*o_putc)(c);
2257     (*o_putc)(EOF);
2258     return 1;
2259 }
2260 #endif
2261
2262 void module_connection(void)
2263 {
2264     oconv = output_conv;
2265     o_putc = std_putc;
2266
2267     /* replace continucation module, from output side */
2268
2269     /* output redicrection */
2270 #ifdef CHECK_OPTION
2271     if (noout_f || guess_f){
2272         o_putc = no_putc;
2273     }
2274 #endif
2275     if (mimeout_f) {
2276         o_mputc = o_putc;
2277         o_putc = mime_putc;
2278         if (mimeout_f == TRUE) {
2279             o_base64conv = oconv; oconv = base64_conv;
2280         }
2281         /* base64_count = 0; */
2282     }
2283
2284     if (nlmode_f) {
2285         o_nlconv = oconv; oconv = nl_conv;
2286     }
2287     if (rot_f) {
2288         o_rot_conv = oconv; oconv = rot_conv;
2289     }
2290     if (iso2022jp_f) {
2291         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2292     }
2293     if (hira_f) {
2294         o_hira_conv = oconv; oconv = hira_conv;
2295     }
2296     if (fold_f) {
2297         o_fconv = oconv; oconv = fold_conv;
2298         f_line = 0;
2299     }
2300     if (alpha_f || x0201_f) {
2301         o_zconv = oconv; oconv = z_conv;
2302     }
2303
2304     i_getc = std_getc;
2305     i_ungetc = std_ungetc;
2306     /* input redicrection */
2307 #ifdef INPUT_OPTION
2308     if (cap_f){
2309         i_cgetc = i_getc; i_getc = cap_getc;
2310         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2311     }
2312     if (url_f){
2313         i_ugetc = i_getc; i_getc = url_getc;
2314         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2315     }
2316 #endif
2317 #ifdef NUMCHAR_OPTION
2318     if (numchar_f){
2319         i_ngetc = i_getc; i_getc = numchar_getc;
2320         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2321     }
2322 #endif
2323 #ifdef UNICODE_NORMALIZATION
2324     if (nfc_f && input_f == UTF8_INPUT){
2325         i_nfc_getc = i_getc; i_getc = nfc_getc;
2326         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2327     }
2328 #endif
2329     if (mime_f && mimebuf_f==FIXED_MIME) {
2330         i_mgetc = i_getc; i_getc = mime_getc;
2331         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2332     }
2333     if (broken_f & 1) {
2334         i_bgetc = i_getc; i_getc = broken_getc;
2335         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2336     }
2337     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2338         set_iconv(-TRUE, e_iconv);
2339     } else if (input_f == SJIS_INPUT) {
2340         set_iconv(-TRUE, s_iconv);
2341 #ifdef UTF8_INPUT_ENABLE
2342     } else if (input_f == UTF8_INPUT) {
2343         set_iconv(-TRUE, w_iconv);
2344     } else if (input_f == UTF16_INPUT) {
2345         set_iconv(-TRUE, w_iconv16);
2346     } else if (input_f == UTF32_INPUT) {
2347         set_iconv(-TRUE, w_iconv32);
2348 #endif
2349     } else {
2350         set_iconv(FALSE, e_iconv);
2351     }
2352
2353     {
2354         struct input_code *p = input_code_list;
2355         while (p->name){
2356             status_reinit(p++);
2357         }
2358     }
2359 }
2360
2361 /*
2362  * Check and Ignore BOM
2363  */
2364 void check_bom(FILE *f)
2365 {
2366     int c2;
2367     switch(c2 = (*i_getc)(f)){
2368     case 0x00:
2369         if((c2 = (*i_getc)(f)) == 0x00){
2370             if((c2 = (*i_getc)(f)) == 0xFE){
2371                 if((c2 = (*i_getc)(f)) == 0xFF){
2372                     if(!input_f){
2373                         set_iconv(TRUE, w_iconv32);
2374                     }
2375                     if (iconv == w_iconv32) {
2376                         input_endian = ENDIAN_BIG;
2377                         return;
2378                     }
2379                     (*i_ungetc)(0xFF,f);
2380                 }else (*i_ungetc)(c2,f);
2381                 (*i_ungetc)(0xFE,f);
2382             }else if(c2 == 0xFF){
2383                 if((c2 = (*i_getc)(f)) == 0xFE){
2384                     if(!input_f){
2385                         set_iconv(TRUE, w_iconv32);
2386                     }
2387                     if (iconv == w_iconv32) {
2388                         input_endian = ENDIAN_2143;
2389                         return;
2390                     }
2391                     (*i_ungetc)(0xFF,f);
2392                 }else (*i_ungetc)(c2,f);
2393                 (*i_ungetc)(0xFF,f);
2394             }else (*i_ungetc)(c2,f);
2395             (*i_ungetc)(0x00,f);
2396         }else (*i_ungetc)(c2,f);
2397         (*i_ungetc)(0x00,f);
2398         break;
2399     case 0xEF:
2400         if((c2 = (*i_getc)(f)) == 0xBB){
2401             if((c2 = (*i_getc)(f)) == 0xBF){
2402                 if(!input_f){
2403                     set_iconv(TRUE, w_iconv);
2404                 }
2405                 if (iconv == w_iconv) {
2406                     return;
2407                 }
2408                 (*i_ungetc)(0xBF,f);
2409             }else (*i_ungetc)(c2,f);
2410             (*i_ungetc)(0xBB,f);
2411         }else (*i_ungetc)(c2,f);
2412         (*i_ungetc)(0xEF,f);
2413         break;
2414     case 0xFE:
2415         if((c2 = (*i_getc)(f)) == 0xFF){
2416             if((c2 = (*i_getc)(f)) == 0x00){
2417                 if((c2 = (*i_getc)(f)) == 0x00){
2418                     if(!input_f){
2419                         set_iconv(TRUE, w_iconv32);
2420                     }
2421                     if (iconv == w_iconv32) {
2422                         input_endian = ENDIAN_3412;
2423                         return;
2424                     }
2425                     (*i_ungetc)(0x00,f);
2426                 }else (*i_ungetc)(c2,f);
2427                 (*i_ungetc)(0x00,f);
2428             }else (*i_ungetc)(c2,f);
2429             if(!input_f){
2430                 set_iconv(TRUE, w_iconv16);
2431             }
2432             if (iconv == w_iconv16) {
2433                 input_endian = ENDIAN_BIG;
2434                 return;
2435             }
2436             (*i_ungetc)(0xFF,f);
2437         }else (*i_ungetc)(c2,f);
2438         (*i_ungetc)(0xFE,f);
2439         break;
2440     case 0xFF:
2441         if((c2 = (*i_getc)(f)) == 0xFE){
2442             if((c2 = (*i_getc)(f)) == 0x00){
2443                 if((c2 = (*i_getc)(f)) == 0x00){
2444                     if(!input_f){
2445                         set_iconv(TRUE, w_iconv32);
2446                     }
2447                     if (iconv == w_iconv32) {
2448                         input_endian = ENDIAN_LITTLE;
2449                         return;
2450                     }
2451                     (*i_ungetc)(0x00,f);
2452                 }else (*i_ungetc)(c2,f);
2453                 (*i_ungetc)(0x00,f);
2454             }else (*i_ungetc)(c2,f);
2455             if(!input_f){
2456                 set_iconv(TRUE, w_iconv16);
2457             }
2458             if (iconv == w_iconv16) {
2459                 input_endian = ENDIAN_LITTLE;
2460                 return;
2461             }
2462             (*i_ungetc)(0xFE,f);
2463         }else (*i_ungetc)(c2,f);
2464         (*i_ungetc)(0xFF,f);
2465         break;
2466     default:
2467         (*i_ungetc)(c2,f);
2468         break;
2469     }
2470 }
2471
2472 /*
2473    Conversion main loop. Code detection only.
2474  */
2475
2476 nkf_char kanji_convert(FILE *f)
2477 {
2478     nkf_char    c3, c2=0, c1, c0=0;
2479     int is_8bit = FALSE;
2480
2481     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2482 #ifdef UTF8_INPUT_ENABLE
2483        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2484 #endif
2485       ){
2486         is_8bit = TRUE;
2487     }
2488
2489     input_mode = ASCII;
2490     output_mode = ASCII;
2491     shift_mode = FALSE;
2492
2493 #define NEXT continue      /* no output, get next */
2494 #define SEND ;             /* output c1 and c2, get next */
2495 #define LAST break         /* end of loop, go closing  */
2496
2497     module_connection();
2498     check_bom(f);
2499
2500     while ((c1 = (*i_getc)(f)) != EOF) {
2501 #ifdef INPUT_CODE_FIX
2502         if (!input_f)
2503 #endif
2504             code_status(c1);
2505         if (c2) {
2506             /* second byte */
2507             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2508                 /* in case of 8th bit is on */
2509                 if (!estab_f&&!mime_decode_mode) {
2510                     /* in case of not established yet */
2511                     /* It is still ambiguious */
2512                     if (h_conv(f, c2, c1)==EOF)
2513                         LAST;
2514                     else
2515                         c2 = 0;
2516                     NEXT;
2517                 } else {
2518                     /* in case of already established */
2519                     if (c1 < AT) {
2520                         /* ignore bogus code and not CP5022x UCD */
2521                         c2 = 0;
2522                         NEXT;
2523                     } else {
2524                         SEND;
2525                     }
2526                 }
2527             } else
2528                 /* second byte, 7 bit code */
2529                 /* it might be kanji shitfted */
2530                 if ((c1 == DEL) || (c1 <= SP)) {
2531                     /* ignore bogus first code */
2532                     c2 = 0;
2533                     NEXT;
2534                 } else
2535                     SEND;
2536         } else {
2537             /* first byte */
2538 #ifdef UTF8_INPUT_ENABLE
2539             if (iconv == w_iconv16) {
2540                 if (input_endian == ENDIAN_BIG) {
2541                     c2 = c1;
2542                     if ((c1 = (*i_getc)(f)) != EOF) {
2543                         if (0xD8 <= c2 && c2 <= 0xDB) {
2544                             if ((c0 = (*i_getc)(f)) != EOF) {
2545                                 c0 <<= 8;
2546                                 if ((c3 = (*i_getc)(f)) != EOF) {
2547                                     c0 |= c3;
2548                                 } else c2 = EOF;
2549                             } else c2 = EOF;
2550                         }
2551                     } else c2 = EOF;
2552                 } else {
2553                     if ((c2 = (*i_getc)(f)) != EOF) {
2554                         if (0xD8 <= c2 && c2 <= 0xDB) {
2555                             if ((c3 = (*i_getc)(f)) != EOF) {
2556                                 if ((c0 = (*i_getc)(f)) != EOF) {
2557                                     c0 <<= 8;
2558                                     c0 |= c3;
2559                                 } else c2 = EOF;
2560                             } else c2 = EOF;
2561                         }
2562                     } else c2 = EOF;
2563                 }
2564                 SEND;
2565             } else if(iconv == w_iconv32){
2566                 int c3 = c1;
2567                 if((c2 = (*i_getc)(f)) != EOF &&
2568                    (c1 = (*i_getc)(f)) != EOF &&
2569                    (c0 = (*i_getc)(f)) != EOF){
2570                     switch(input_endian){
2571                     case ENDIAN_BIG:
2572                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2573                         break;
2574                     case ENDIAN_LITTLE:
2575                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2576                         break;
2577                     case ENDIAN_2143:
2578                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2579                         break;
2580                     case ENDIAN_3412:
2581                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2582                         break;
2583                     }
2584                     c2 = 0;
2585                 }else{
2586                     c2 = EOF;
2587                 }
2588                 SEND;
2589             } else
2590 #endif
2591 #ifdef NUMCHAR_OPTION
2592             if (is_unicode_capsule(c1)){
2593                 SEND;
2594             } else
2595 #endif
2596             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2597                 /* 8 bit code */
2598                 if (!estab_f && !iso8859_f) {
2599                     /* not established yet */
2600                     c2 = c1;
2601                     NEXT;
2602                 } else { /* estab_f==TRUE */
2603                     if (iso8859_f) {
2604                         c2 = ISO8859_1;
2605                         c1 &= 0x7f;
2606                         SEND;
2607                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2608                         /* SJIS X0201 Case... */
2609                         if(iso2022jp_f && x0201_f==NO_X0201) {
2610                             (*oconv)(GETA1, GETA2);
2611                             NEXT;
2612                         } else {
2613                             c2 = X0201;
2614                             c1 &= 0x7f;
2615                             SEND;
2616                         }
2617                     } else if (c1==SSO && iconv != s_iconv) {
2618                         /* EUC X0201 Case */
2619                         c1 = (*i_getc)(f);  /* skip SSO */
2620                         code_status(c1);
2621                         if (SSP<=c1 && c1<0xe0) {
2622                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2623                                 (*oconv)(GETA1, GETA2);
2624                                 NEXT;
2625                             } else {
2626                                 c2 = X0201;
2627                                 c1 &= 0x7f;
2628                                 SEND;
2629                             }
2630                         } else  { /* bogus code, skip SSO and one byte */
2631                             NEXT;
2632                         }
2633                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2634                                (c1 == 0xFD || c1 == 0xFE)) {
2635                         /* CP10001 */
2636                         c2 = X0201;
2637                         c1 &= 0x7f;
2638                         SEND;
2639                     } else {
2640                        /* already established */
2641                        c2 = c1;
2642                        NEXT;
2643                     }
2644                 }
2645             } else if ((c1 > SP) && (c1 != DEL)) {
2646                 /* in case of Roman characters */
2647                 if (shift_mode) {
2648                     /* output 1 shifted byte */
2649                     if (iso8859_f) {
2650                         c2 = ISO8859_1;
2651                         SEND;
2652                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2653                       /* output 1 shifted byte */
2654                         if(iso2022jp_f && x0201_f==NO_X0201) {
2655                             (*oconv)(GETA1, GETA2);
2656                             NEXT;
2657                         } else {
2658                             c2 = X0201;
2659                             SEND;
2660                         }
2661                     } else {
2662                         /* look like bogus code */
2663                         NEXT;
2664                     }
2665                 } else if (input_mode == X0208 || input_mode == X0212 ||
2666                            input_mode == X0213_1 || input_mode == X0213_2) {
2667                     /* in case of Kanji shifted */
2668                     c2 = c1;
2669                     NEXT;
2670                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2671                     /* Check MIME code */
2672                     if ((c1 = (*i_getc)(f)) == EOF) {
2673                         (*oconv)(0, '=');
2674                         LAST;
2675                     } else if (c1 == '?') {
2676                         /* =? is mime conversion start sequence */
2677                         if(mime_f == STRICT_MIME) {
2678                             /* check in real detail */
2679                             if (mime_begin_strict(f) == EOF)
2680                                 LAST;
2681                             else
2682                                 NEXT;
2683                         } else if (mime_begin(f) == EOF)
2684                             LAST;
2685                         else
2686                             NEXT;
2687                     } else {
2688                         (*oconv)(0, '=');
2689                         (*i_ungetc)(c1,f);
2690                         NEXT;
2691                     }
2692                 } else {
2693                     /* normal ASCII code */
2694                     SEND;
2695                 }
2696             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2697                 shift_mode = FALSE;
2698                 NEXT;
2699             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2700                 shift_mode = TRUE;
2701                 NEXT;
2702             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2703                 if ((c1 = (*i_getc)(f)) == EOF) {
2704                     /*  (*oconv)(0, ESC); don't send bogus code */
2705                     LAST;
2706                 } else if (c1 == '$') {
2707                     if ((c1 = (*i_getc)(f)) == EOF) {
2708                         /*
2709                         (*oconv)(0, ESC); don't send bogus code
2710                         (*oconv)(0, '$'); */
2711                         LAST;
2712                     } else if (c1 == '@'|| c1 == 'B') {
2713                         /* This is kanji introduction */
2714                         input_mode = X0208;
2715                         shift_mode = FALSE;
2716                         set_input_codename("ISO-2022-JP");
2717 #ifdef CHECK_OPTION
2718                         debug("ISO-2022-JP");
2719 #endif
2720                         NEXT;
2721                     } else if (c1 == '(') {
2722                         if ((c1 = (*i_getc)(f)) == EOF) {
2723                             /* don't send bogus code
2724                             (*oconv)(0, ESC);
2725                             (*oconv)(0, '$');
2726                             (*oconv)(0, '(');
2727                                 */
2728                             LAST;
2729                         } else if (c1 == '@'|| c1 == 'B') {
2730                             /* This is kanji introduction */
2731                             input_mode = X0208;
2732                             shift_mode = FALSE;
2733                             NEXT;
2734 #ifdef X0212_ENABLE
2735                         } else if (c1 == 'D'){
2736                             input_mode = X0212;
2737                             shift_mode = FALSE;
2738                             NEXT;
2739 #endif /* X0212_ENABLE */
2740                         } else if (c1 == (X0213_1&0x7F)){
2741                             input_mode = X0213_1;
2742                             shift_mode = FALSE;
2743                             NEXT;
2744                         } else if (c1 == (X0213_2&0x7F)){
2745                             input_mode = X0213_2;
2746                             shift_mode = FALSE;
2747                             NEXT;
2748                         } else {
2749                             /* could be some special code */
2750                             (*oconv)(0, ESC);
2751                             (*oconv)(0, '$');
2752                             (*oconv)(0, '(');
2753                             (*oconv)(0, c1);
2754                             NEXT;
2755                         }
2756                     } else if (broken_f&0x2) {
2757                         /* accept any ESC-(-x as broken code ... */
2758                         input_mode = X0208;
2759                         shift_mode = FALSE;
2760                         NEXT;
2761                     } else {
2762                         (*oconv)(0, ESC);
2763                         (*oconv)(0, '$');
2764                         (*oconv)(0, c1);
2765                         NEXT;
2766                     }
2767                 } else if (c1 == '(') {
2768                     if ((c1 = (*i_getc)(f)) == EOF) {
2769                         /* don't send bogus code
2770                         (*oconv)(0, ESC);
2771                         (*oconv)(0, '('); */
2772                         LAST;
2773                     } else {
2774                         if (c1 == 'I') {
2775                             /* This is X0201 kana introduction */
2776                             input_mode = X0201; shift_mode = X0201;
2777                             NEXT;
2778                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2779                             /* This is X0208 kanji introduction */
2780                             input_mode = ASCII; shift_mode = FALSE;
2781                             NEXT;
2782                         } else if (broken_f&0x2) {
2783                             input_mode = ASCII; shift_mode = FALSE;
2784                             NEXT;
2785                         } else {
2786                             (*oconv)(0, ESC);
2787                             (*oconv)(0, '(');
2788                             /* maintain various input_mode here */
2789                             SEND;
2790                         }
2791                     }
2792                } else if ( c1 == 'N' || c1 == 'n'){
2793                    /* SS2 */
2794                    c3 = (*i_getc)(f);  /* skip SS2 */
2795                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2796                        c1 = c3;
2797                        c2 = X0201;
2798                        SEND;
2799                    }else{
2800                        (*i_ungetc)(c3, f);
2801                        /* lonely ESC  */
2802                        (*oconv)(0, ESC);
2803                        SEND;
2804                    }
2805                 } else {
2806                     /* lonely ESC  */
2807                     (*oconv)(0, ESC);
2808                     SEND;
2809                 }
2810             } else if (c1 == ESC && iconv == s_iconv) {
2811                 /* ESC in Shift_JIS */
2812                 if ((c1 = (*i_getc)(f)) == EOF) {
2813                     /*  (*oconv)(0, ESC); don't send bogus code */
2814                     LAST;
2815                 } else if (c1 == '$') {
2816                     /* J-PHONE emoji */
2817                     if ((c1 = (*i_getc)(f)) == EOF) {
2818                         /*
2819                            (*oconv)(0, ESC); don't send bogus code
2820                            (*oconv)(0, '$'); */
2821                         LAST;
2822                     } else {
2823                         if (('E' <= c1 && c1 <= 'G') ||
2824                             ('O' <= c1 && c1 <= 'Q')) {
2825                             /*
2826                                NUM : 0 1 2 3 4 5
2827                                BYTE: G E F O P Q
2828                                C%7 : 1 6 0 2 3 4
2829                                C%7 : 0 1 2 3 4 5 6
2830                                NUM : 2 0 3 4 5 X 1
2831                              */
2832                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2833                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2834                             while ((c1 = (*i_getc)(f)) != EOF) {
2835                                 if (SP <= c1 && c1 <= 'z') {
2836                                     (*oconv)(0, c1 + c0);
2837                                 } else break; /* c1 == SO */
2838                             }
2839                         }
2840                     }
2841                     if (c1 == EOF) LAST;
2842                     NEXT;
2843                 } else {
2844                     /* lonely ESC  */
2845                     (*oconv)(0, ESC);
2846                     SEND;
2847                 }
2848             } else if (c1 == LF || c1 == CR) {
2849                 if (broken_f&4) {
2850                     input_mode = ASCII; set_iconv(FALSE, 0);
2851                     SEND;
2852                 } else if (mime_decode_f && !mime_decode_mode){
2853                     if (c1 == LF) {
2854                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2855                             i_ungetc(SP,f);
2856                             continue;
2857                         } else {
2858                             i_ungetc(c1,f);
2859                         }
2860                         c1 = LF;
2861                         SEND;
2862                     } else  { /* if (c1 == CR)*/
2863                         if ((c1=(*i_getc)(f))!=EOF) {
2864                             if (c1==SP) {
2865                                 i_ungetc(SP,f);
2866                                 continue;
2867                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
2868                                 i_ungetc(SP,f);
2869                                 continue;
2870                             } else {
2871                                 i_ungetc(c1,f);
2872                             }
2873                             i_ungetc(LF,f);
2874                         } else {
2875                             i_ungetc(c1,f);
2876                         }
2877                         c1 = CR;
2878                         SEND;
2879                     }
2880                 }
2881                 if (!nlmode_f) {
2882                     if (prev_cr && c1 == LF) nlmode_f = CRLF;
2883                     else nlmode_f = c1;
2884                 }
2885             } else if (c1 == DEL && input_mode == X0208) {
2886                 /* CP5022x */
2887                 c2 = c1;
2888                 NEXT;
2889             } else
2890                 SEND;
2891         }
2892         /* send: */
2893         switch(input_mode){
2894         case ASCII:
2895             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2896             case -2:
2897                 /* 4 bytes UTF-8 */
2898                 if ((c0 = (*i_getc)(f)) != EOF) {
2899                     code_status(c0);
2900                     c0 <<= 8;
2901                     if ((c3 = (*i_getc)(f)) != EOF) {
2902                         code_status(c3);
2903                         (*iconv)(c2, c1, c0|c3);
2904                     }
2905                 }
2906                 break;
2907             case -1:
2908                 /* 3 bytes EUC or UTF-8 */
2909                 if ((c0 = (*i_getc)(f)) != EOF) {
2910                     code_status(c0);
2911                     (*iconv)(c2, c1, c0);
2912                 }
2913                 break;
2914             }
2915             break;
2916         case X0208:
2917         case X0213_1:
2918             if (ms_ucs_map_f &&
2919                 0x7F <= c2 && c2 <= 0x92 &&
2920                 0x21 <= c1 && c1 <= 0x7E) {
2921                 /* CP932 UDC */
2922                 if(c1 == 0x7F) return 0;
2923                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2924                 c2 = 0;
2925             }
2926             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2927             break;
2928 #ifdef X0212_ENABLE
2929         case X0212:
2930             (*oconv)(PREFIX_EUCG3 | c2, c1);
2931             break;
2932 #endif /* X0212_ENABLE */
2933         case X0213_2:
2934             (*oconv)(PREFIX_EUCG3 | c2, c1);
2935             break;
2936         default:
2937             (*oconv)(input_mode, c1);  /* other special case */
2938         }
2939
2940         c2 = 0;
2941         c0 = 0;
2942         continue;
2943         /* goto next_word */
2944     }
2945
2946     /* epilogue */
2947     (*iconv)(EOF, 0, 0);
2948     if (!input_codename)
2949     {
2950         if (is_8bit) {
2951             struct input_code *p = input_code_list;
2952             struct input_code *result = p;
2953             while (p->name){
2954                 if (p->score < result->score) result = p;
2955                 ++p;
2956             }
2957             set_input_codename(result->name);
2958 #ifdef CHECK_OPTION
2959             debug(result->name);
2960 #endif
2961         }
2962     }
2963     return 1;
2964 }
2965
2966 nkf_char
2967 h_conv(FILE *f, nkf_char c2, nkf_char c1)
2968 {
2969     nkf_char ret, c3, c0;
2970     int hold_index;
2971
2972
2973     /** it must NOT be in the kanji shifte sequence      */
2974     /** it must NOT be written in JIS7                   */
2975     /** and it must be after 2 byte 8bit code            */
2976
2977     hold_count = 0;
2978     push_hold_buf(c2);
2979     push_hold_buf(c1);
2980
2981     while ((c1 = (*i_getc)(f)) != EOF) {
2982         if (c1 == ESC){
2983             (*i_ungetc)(c1,f);
2984             break;
2985         }
2986         code_status(c1);
2987         if (push_hold_buf(c1) == EOF || estab_f){
2988             break;
2989         }
2990     }
2991
2992     if (!estab_f){
2993         struct input_code *p = input_code_list;
2994         struct input_code *result = p;
2995         if (c1 == EOF){
2996             code_status(c1);
2997         }
2998         while (p->name){
2999             if (p->status_func && p->score < result->score){
3000                 result = p;
3001             }
3002             ++p;
3003         }
3004         set_iconv(TRUE, result->iconv_func);
3005     }
3006
3007
3008     /** now,
3009      ** 1) EOF is detected, or
3010      ** 2) Code is established, or
3011      ** 3) Buffer is FULL (but last word is pushed)
3012      **
3013      ** in 1) and 3) cases, we continue to use
3014      ** Kanji codes by oconv and leave estab_f unchanged.
3015      **/
3016
3017     ret = c1;
3018     hold_index = 0;
3019     while (hold_index < hold_count){
3020         c2 = hold_buf[hold_index++];
3021         if (c2 <= DEL
3022 #ifdef NUMCHAR_OPTION
3023             || is_unicode_capsule(c2)
3024 #endif
3025             ){
3026             (*iconv)(0, c2, 0);
3027             continue;
3028         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3029             (*iconv)(X0201, c2, 0);
3030             continue;
3031         }
3032         if (hold_index < hold_count){
3033             c1 = hold_buf[hold_index++];
3034         }else{
3035             c1 = (*i_getc)(f);
3036             if (c1 == EOF){
3037                 c3 = EOF;
3038                 break;
3039             }
3040             code_status(c1);
3041         }
3042         c0 = 0;
3043         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3044         case -2:
3045             /* 4 bytes UTF-8 */
3046             if (hold_index < hold_count){
3047                 c0 = hold_buf[hold_index++];
3048             } else if ((c0 = (*i_getc)(f)) == EOF) {
3049                 ret = EOF;
3050                 break;
3051             } else {
3052                 code_status(c0);
3053                 c0 <<= 8;
3054                 if (hold_index < hold_count){
3055                     c3 = hold_buf[hold_index++];
3056                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3057                     c0 = ret = EOF;
3058                     break;
3059                 } else {
3060                     code_status(c3);
3061                     (*iconv)(c2, c1, c0|c3);
3062                 }
3063             }
3064             break;
3065         case -1:
3066             /* 3 bytes EUC or UTF-8 */
3067             if (hold_index < hold_count){
3068                 c0 = hold_buf[hold_index++];
3069             } else if ((c0 = (*i_getc)(f)) == EOF) {
3070                 ret = EOF;
3071                 break;
3072             } else {
3073                 code_status(c0);
3074             }
3075             (*iconv)(c2, c1, c0);
3076             break;
3077         }
3078         if (c0 == EOF) break;
3079     }
3080     return ret;
3081 }
3082
3083 nkf_char push_hold_buf(nkf_char c2)
3084 {
3085     if (hold_count >= HOLD_SIZE*2)
3086         return (EOF);
3087     hold_buf[hold_count++] = (unsigned char)c2;
3088     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3089 }
3090
3091 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3092 {
3093 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3094     nkf_char val;
3095 #endif
3096     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3097 #ifdef SHIFTJIS_CP932
3098     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3099         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3100         if (val){
3101             c2 = val >> 8;
3102             c1 = val & 0xff;
3103         }
3104     }
3105     if (cp932inv_f
3106         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3107         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3108         if (c){
3109             c2 = c >> 8;
3110             c1 = c & 0xff;
3111         }
3112     }
3113 #endif /* SHIFTJIS_CP932 */
3114 #ifdef X0212_ENABLE
3115     if (!x0213_f && is_ibmext_in_sjis(c2)){
3116         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3117         if (val){
3118             if (val > 0x7FFF){
3119                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3120                 c1 = val & 0xff;
3121             }else{
3122                 c2 = val >> 8;
3123                 c1 = val & 0xff;
3124             }
3125             if (p2) *p2 = c2;
3126             if (p1) *p1 = c1;
3127             return 0;
3128         }
3129     }
3130 #endif
3131     if(c2 >= 0x80){
3132         if(x0213_f && c2 >= 0xF0){
3133             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3134                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3135             }else{ /* 78<=k<=94 */
3136                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3137                 if (0x9E < c1) c2++;
3138             }
3139         }else{
3140             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3141             if (0x9E < c1) c2++;
3142         }
3143         if (c1 < 0x9F)
3144             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3145         else {
3146             c1 = c1 - 0x7E;
3147         }
3148     }
3149
3150 #ifdef X0212_ENABLE
3151     c2 = x0212_unshift(c2);
3152 #endif
3153     if (p2) *p2 = c2;
3154     if (p1) *p1 = c1;
3155     return 0;
3156 }
3157
3158 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3159 {
3160     if (c2 == X0201) {
3161         c1 &= 0x7f;
3162     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3163         /* NOP */
3164     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3165         /* CP932 UDC */
3166         if(c1 == 0x7F) return 0;
3167         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3168         c2 = 0;
3169     } else {
3170         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3171         if (ret) return ret;
3172     }
3173     (*oconv)(c2, c1);
3174     return 0;
3175 }
3176
3177 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3178 {
3179     if (c2 == X0201) {
3180         c1 &= 0x7f;
3181 #ifdef X0212_ENABLE
3182     }else if (c2 == 0x8f){
3183         if (c0 == 0){
3184             return -1;
3185         }
3186         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3187             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3188             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3189             c2 = 0;
3190         } else {
3191             c2 = (c2 << 8) | (c1 & 0x7f);
3192             c1 = c0 & 0x7f;
3193 #ifdef SHIFTJIS_CP932
3194             if (cp51932_f){
3195                 nkf_char s2, s1;
3196                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3197                     s2e_conv(s2, s1, &c2, &c1);
3198                     if (c2 < 0x100){
3199                         c1 &= 0x7f;
3200                         c2 &= 0x7f;
3201                     }
3202                 }
3203             }
3204 #endif /* SHIFTJIS_CP932 */
3205         }
3206 #endif /* X0212_ENABLE */
3207     } else if (c2 == SSO){
3208         c2 = X0201;
3209         c1 &= 0x7f;
3210     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3211         /* NOP */
3212     } else {
3213         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3214             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3215             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3216             c2 = 0;
3217         } else {
3218             c1 &= 0x7f;
3219             c2 &= 0x7f;
3220 #ifdef SHIFTJIS_CP932
3221             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3222                 nkf_char s2, s1;
3223                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3224                     s2e_conv(s2, s1, &c2, &c1);
3225                     if (c2 < 0x100){
3226                         c1 &= 0x7f;
3227                         c2 &= 0x7f;
3228                     }
3229                 }
3230             }
3231 #endif /* SHIFTJIS_CP932 */
3232         }
3233     }
3234     (*oconv)(c2, c1);
3235     return 0;
3236 }
3237
3238 #ifdef UTF8_INPUT_ENABLE
3239 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3240 {
3241     nkf_char ret = 0;
3242
3243     if (!c1){
3244         *p2 = 0;
3245         *p1 = c2;
3246     }else if (0xc0 <= c2 && c2 <= 0xef) {
3247         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3248 #ifdef NUMCHAR_OPTION
3249         if (ret > 0){
3250             if (p2) *p2 = 0;
3251             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3252             ret = 0;
3253         }
3254 #endif
3255     }
3256     return ret;
3257 }
3258
3259 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3260 {
3261     nkf_char ret = 0;
3262     static const char w_iconv_utf8_1st_byte[] =
3263     { /* 0xC0 - 0xFF */
3264         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3265         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3266         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3267         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3268
3269     if (c2 < 0 || 0xff < c2) {
3270     }else if (c2 == 0) { /* 0 : 1 byte*/
3271         c0 = 0;
3272     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3273         return 0;
3274     } else{
3275         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3276         case 21:
3277             if (c1 < 0x80 || 0xBF < c1) return 0;
3278             break;
3279         case 30:
3280             if (c0 == 0) return -1;
3281             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3282                 return 0;
3283             break;
3284         case 31:
3285         case 33:
3286             if (c0 == 0) return -1;
3287             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3288                 return 0;
3289             break;
3290         case 32:
3291             if (c0 == 0) return -1;
3292             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3293                 return 0;
3294             break;
3295         case 40:
3296             if (c0 == 0) return -2;
3297             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3298                 return 0;
3299             break;
3300         case 41:
3301             if (c0 == 0) return -2;
3302             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3303                 return 0;
3304             break;
3305         case 42:
3306             if (c0 == 0) return -2;
3307             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3308                 return 0;
3309             break;
3310         default:
3311             return 0;
3312             break;
3313         }
3314     }
3315     if (c2 == 0 || c2 == EOF){
3316     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3317         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3318         c2 = 0;
3319     } else {
3320         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3321     }
3322     if (ret == 0){
3323         (*oconv)(c2, c1);
3324     }
3325     return ret;
3326 }
3327 #endif
3328
3329 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3330 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3331 {
3332     val &= VALUE_MASK;
3333     if (val < 0x80){
3334         *p2 = val;
3335         *p1 = 0;
3336         *p0 = 0;
3337     }else if (val < 0x800){
3338         *p2 = 0xc0 | (val >> 6);
3339         *p1 = 0x80 | (val & 0x3f);
3340         *p0 = 0;
3341     } else if (val <= NKF_INT32_C(0xFFFF)) {
3342         *p2 = 0xe0 | (val >> 12);
3343         *p1 = 0x80 | ((val >> 6) & 0x3f);
3344         *p0 = 0x80 | (val        & 0x3f);
3345     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3346         *p2 = 0xe0 |  (val >> 16);
3347         *p1 = 0x80 | ((val >> 12) & 0x3f);
3348         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3349     } else {
3350         *p2 = 0;
3351         *p1 = 0;
3352         *p0 = 0;
3353     }
3354 }
3355 #endif
3356
3357 #ifdef UTF8_INPUT_ENABLE
3358 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3359 {
3360     nkf_char val;
3361     if (c2 >= 0xf8) {
3362         val = -1;
3363     } else if (c2 >= 0xf0){
3364         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3365         val = (c2 & 0x0f) << 18;
3366         val |= (c1 & 0x3f) << 12;
3367         val |= (c0 & 0x3f00) >> 2;
3368         val |= (c0 & 0x3f);
3369     }else if (c2 >= 0xe0){
3370         val = (c2 & 0x0f) << 12;
3371         val |= (c1 & 0x3f) << 6;
3372         val |= (c0 & 0x3f);
3373     }else if (c2 >= 0xc0){
3374         val = (c2 & 0x1f) << 6;
3375         val |= (c1 & 0x3f);
3376     }else{
3377         val = c2;
3378     }
3379     return val;
3380 }
3381
3382 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3383 {
3384     nkf_char c2, c1, c0;
3385     nkf_char ret = 0;
3386     val &= VALUE_MASK;
3387     if (val < 0x80){
3388         *p2 = 0;
3389         *p1 = val;
3390     }else{
3391         w16w_conv(val, &c2, &c1, &c0);
3392         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3393 #ifdef NUMCHAR_OPTION
3394         if (ret > 0){
3395             *p2 = 0;
3396             *p1 = CLASS_UNICODE | val;
3397             ret = 0;
3398         }
3399 #endif
3400     }
3401     return ret;
3402 }
3403 #endif
3404
3405 #ifdef UTF8_INPUT_ENABLE
3406 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3407 {
3408     nkf_char ret = 0;
3409     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3410         (*oconv)(c2, c1);
3411         return 0;
3412     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3413         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3414             return -2;
3415         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3416         c2 = 0;
3417     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3418         /*
3419            return 2;
3420         */
3421         return 1;
3422     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3423     if (ret) return ret;
3424     (*oconv)(c2, c1);
3425     return 0;
3426 }
3427
3428 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3429 {
3430     int ret = 0;
3431
3432     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3433     } else if (is_unicode_bmp(c1)) {
3434         ret = w16e_conv(c1, &c2, &c1);
3435     } else {
3436         c2 = 0;
3437         c1 =  CLASS_UNICODE | c1;
3438     }
3439     if (ret) return ret;
3440     (*oconv)(c2, c1);
3441     return 0;
3442 }
3443
3444 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3445 {
3446     const unsigned short *const *pp;
3447     const unsigned short *const *const *ppp;
3448     static const char no_best_fit_chars_table_C2[] =
3449     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3450         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3451         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3452         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3453     static const char no_best_fit_chars_table_C2_ms[] =
3454     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3455         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3456         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3457         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3458     static const char no_best_fit_chars_table_932_C2[] =
3459     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3460         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3461         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3462         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3463     static const char no_best_fit_chars_table_932_C3[] =
3464     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3465         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3466         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3467         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3468     nkf_char ret = 0;
3469
3470     if(c2 < 0x80){
3471         *p2 = 0;
3472         *p1 = c2;
3473     }else if(c2 < 0xe0){
3474         if(no_best_fit_chars_f){
3475             if(ms_ucs_map_f == UCS_MAP_CP932){
3476                 switch(c2){
3477                 case 0xC2:
3478                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3479                     break;
3480                 case 0xC3:
3481                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3482                     break;
3483                 }
3484             }else if(!cp932inv_f){
3485                 switch(c2){
3486                 case 0xC2:
3487                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3488                     break;
3489                 case 0xC3:
3490                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3491                     break;
3492                 }
3493             }else if(ms_ucs_map_f == UCS_MAP_MS){
3494                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3495             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3496                 switch(c2){
3497                 case 0xC2:
3498                     switch(c1){
3499                     case 0xA2:
3500                     case 0xA3:
3501                     case 0xA5:
3502                     case 0xA6:
3503                     case 0xAC:
3504                     case 0xAF:
3505                     case 0xB8:
3506                         return 1;
3507                     }
3508                     break;
3509                 }
3510             }
3511         }
3512         pp =
3513             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3514             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3515             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3516             utf8_to_euc_2bytes;
3517         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3518     }else if(c0 < 0xF0){
3519         if(no_best_fit_chars_f){
3520             if(ms_ucs_map_f == UCS_MAP_CP932){
3521                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3522             }else if(ms_ucs_map_f == UCS_MAP_MS){
3523                 switch(c2){
3524                 case 0xE2:
3525                     switch(c1){
3526                     case 0x80:
3527                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3528                         break;
3529                     case 0x88:
3530                         if(c0 == 0x92) return 1;
3531                         break;
3532                     }
3533                     break;
3534                 case 0xE3:
3535                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3536                     break;
3537                 }
3538             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3539                 switch(c2){
3540                 case 0xE3:
3541                     switch(c1){
3542                     case 0x82:
3543                             if(c0 == 0x94) return 1;
3544                         break;
3545                     case 0x83:
3546  &nb